-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathLR1.cpp
More file actions
483 lines (418 loc) · 15.9 KB
/
LR1.cpp
File metadata and controls
483 lines (418 loc) · 15.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
#include "LR1.h"
#include <iostream>
#include "hash_combine.h"
#include "AnalysisError.h"
#include <sstream>
std::size_t LR1Item::Hash::operator()(const LR1Item &obj) const
{
std::size_t ans = 0;
ans = my_hash_combine(ans, std::hash<int>()(obj.position));
ans = my_hash_combine(ans, std::hash<bool>()(obj.accepted.includedEmpty));
for (const auto s : obj.accepted.symbols) {
ans = my_hash_combine(ans, s->hash());
}
ans = my_hash_combine(ans, obj.productor->hash());
return ans;
}
inline bool LR1Item::isReduceState() const
{
return position == productor->length();
}
std::vector<LR1Item> LR1Item::closure(const ProductorList &productors) const
{
if (isReduceState()) return {};
std::vector<LR1Item> ans;
SymbolSet acc = {};
auto iter = productor->right.begin() + position + 1;
for (; iter < productor->right.end(); iter++) {
acc += productors.firstOf(*iter);
if (!productors.canDeriveEmpty(*iter)) break;
}
if (iter == productor->right.end()) //右侧全空
acc += accepted; //继承本条
auto s = productor->right.begin() + position;
for (; s < productor->right.end(); s++) {
if ((*s)->isVt())break;
auto prod = productors.queryLeftIs(*s);
for (auto const p : prod) { // 考虑所有产生式左部为下一元素的产生式
ans.push_back({ p, 0, acc });
}
if (!productors.canDeriveEmpty(*s)) break; // 如果该符号不可能空,则推导到此
}
return ans;
}
bool LR1Item::operator==(const LR1Item &other) const
{
return *productor == *other.productor && position == other.position && accepted == other.accepted;
}
const Symbol *LR1Item::getShiftInSymbol() const
{
if (isReduceState())
throw std::exception("Reduce state doesnt have shift in symbol.");
return (*productor)[position];
}
LR1Item LR1Item::getItemAfterShift() const{
if(isReduceState())
throw std::exception("Reduce state cannot shift in.");
LR1Item ans(*this);
ans.position++;
return ans;
}
bool LR1Item::operator<(const LR1Item &other) const
{
Hash hash;
return hash(*this) < hash(other); //直接比较哈希值的大小
}
bool LR1ItemSet::included(const LR1Item &item)
{
return itemSet.find(item) != itemSet.end();
}
int LR1ItemSet::insertIfNotInclude(const std::vector<LR1Item> &item)
{
int insertedNum = 0;
for (const auto &i : item) {
if (included(i)) continue;
itemSet.insert(i);
insertedNum++;
}
return insertedNum;
}
SymbolSet LR1ItemSet::queryAllShiftInSymbol() const{
SymbolSet ans;
for (const auto &i : itemSet) {
if (i.isReduceState()) continue;
ans.symbols.insert(i.getShiftInSymbol());
}
return ans;
}
std::list<LR1Item> LR1ItemSet::queryShiftInItem() const
{
std::list<LR1Item> ans;
for (const auto &i : itemSet) {
if (i.isReduceState()) continue;
ans.push_back(i);
}
return ans;
}
std::vector<LR1Item> LR1ItemSet::queryItemAfterShift(const Symbol *s) const
{
std::vector<LR1Item> ans;
for (const auto &i : itemSet) {
if (i.isReduceState()) continue;
if (!i.getShiftInSymbol()->match(s)) continue;
ans.push_back(i.getItemAfterShift());
}
return ans;
}
std::vector<LR1Item> LR1ItemSet::queryNonEmptyReduceItem(const Symbol *s) const
{
std::vector<LR1Item> ans;
for (const auto &i : itemSet) {
if (!i.isReduceState()) continue;
if (!i.accepted.matchIncluded(s)) continue;
if (i.productor->isEmpty()) continue;
ans.push_back(i);
}
return ans;
}
std::vector<LR1Item> LR1ItemSet::queryEmptyReduceItem(const Symbol *s) const
{
std::vector<LR1Item> ans;
for (const auto &i : itemSet) {
if (!i.isReduceState()) continue;
if (!i.accepted.matchIncluded(s)) continue;
if (!i.productor->isEmpty()) continue;
ans.push_back(i);
}
return ans;
}
LR1ItemSet LR1ItemSet::from(std::vector<LR1Item> &items, const ProductorList &productorList) {
LR1ItemSet ans{ };
for (const auto &i : items)
ans.itemSet.insert(i);
while (true) {
int insertedNum = 0;
std::vector<LR1Item> closureBuf;
for (const auto &i : ans.itemSet) {
auto a = i.closure(productorList);
closureBuf.insert(closureBuf.end(), a.begin(), a.end());
}
int newAddedNum = ans.insertIfNotInclude(closureBuf);
if (newAddedNum == 0) break;
}
return ans;
}
bool LR1ItemSet::operator==(const LR1ItemSet &other) const
{
return itemSet == other.itemSet;
}
bool LR1DFA::isItemSetPresent(const LR1ItemSet &s)
{
for (const auto &state : stateList) {
if (state.itemSet == s) return true;
}
return false;
}
const LR1ItemSet *LR1DFA::findItemSetInStateList(const LR1ItemSet &s) const
{
for (const auto &state : stateList) {
if (state.itemSet == s) return &state.itemSet;
}
return nullptr;
}
const LR1DFAState *LR1DFA::findDFAStateByItemSet(const LR1ItemSet &s) const
{
for (const auto &state : stateList) {
if (state.itemSet == s) return &state;
}
return nullptr;
}
LR1DFA::LR1DFA(LR1Item &start, const ProductorList &productorList){
{ // init
std::vector<LR1Item> startVect{ start };
LR1DFAState initialState = { LR1ItemSet::from(startVect, productorList) , {} };
auto shiftInSym = initialState.itemSet.queryAllShiftInSymbol(); //FIXME 移进符号可能有重复的
for (const auto s : shiftInSym.symbols) {
initialState.shiftInAction.push_back({ s, nullptr });
}
stateList.push_back(initialState);
}
for (auto &state : stateList) { //遍历状态列表中的每一个元素
for (auto &action : state.shiftInAction) { //遍历状态中的每一个移进动作
if (action.next != nullptr) continue;
auto shiftInItemsTmp = state.itemSet.queryItemAfterShift(action.sym); //如果移进的跳转状态未计算
LR1ItemSet shiftInItemSet = LR1ItemSet::from(shiftInItemsTmp, productorList); //生成移进后的LR1项目集合
const LR1DFAState *nextState = findDFAStateByItemSet(shiftInItemSet);
if (nextState) { // 如果该状态存在于状态列表中, 则直接将该状态赋值给移进动作
action.next = nextState;
continue;
}
LR1DFAState newDFAState = { shiftInItemSet , {} };
auto shiftInSym = shiftInItemSet.queryAllShiftInSymbol();
for (const auto s : shiftInSym.symbols)
newDFAState.shiftInAction.push_back({ s, nullptr });
stateList.push_back(newDFAState);
action.next = &stateList.back();
}
}
}
int LR1DFA::indexOfState(const LR1DFAState &state) const
{
int index = 0;
for (const auto &s : stateList) {
if (&s == &state) return index;
index++;
}
return -1;
}
bool LR1DFA::shiftIn(SyntaxTree &sym, const LR1DFAState *next)
{
//std::cout << "Shift in " << *sym.symbol() << ", move to I" << indexOfState(*next) << std::endl << std::endl;
symbolStack.push(sym);
stateStack.push(next);
return true;
}
bool LR1DFA::reduce(const Productor *productor)
{
//std::cout << "Reduce by " << *productor << std::endl << std::endl;
SyntaxTree ntree(*productor->left);
for (int n = 0; n < productor->length(); n++) {
ntree.leaf.push_front(std::move(symbolStack.top()));
symbolStack.pop();
stateStack.pop();
}
tempSymbolInput.emplace(ntree);
return true;
}
struct AnalysisFailException {
};
void LR1DFA::throwError(const std::vector<Token *> &token, std::vector<Token *>::const_iterator tIter, const LR1DFAState *lastState){
while (tIter < token.end()-1 && ( (*tIter)->getType() == Token::Type::Whitespace || (*tIter)->getType() == Token::Type::Linewrap)) {
tIter++;
};
std::ostringstream verbose;
if (lastState != nullptr) {
SymbolSet accept = lastState->itemSet.queryAllShiftInSymbol();
verbose << "Expect ";
for (const auto &p : lastState->itemSet.itemSet) {
if (!p.isReduceState()) continue;
accept += p.accepted;
}
for (const auto &s : accept.symbols) {
if (s->isVt())
verbose << *s;
}
if (accept.symbols.size() == 0) verbose << "Nothing";
}
throw AnalysisError(token, tIter, "Can not parse token", verbose.str());
}
SyntaxTree LR1DFA::parse(const std::vector<Token *> &tokenList)
{
{ //复位符号栈和状态栈
auto emptySymbolStack = std::stack<SyntaxTree>();
symbolStack.swap(emptySymbolStack); //清空符号栈
symbolStack.push(Vt::End); //符号栈中推入初始终结符
auto emptyStateStack = std::stack<const LR1DFAState *>();
stateStack.swap(emptyStateStack); //清空状态栈
stateStack.push(&stateList.front());//状态栈推入状态0
auto emptyBackTraceStack = std::stack<BackTrackItem>();
backTrackStack.swap(emptyBackTraceStack);
tempSymbolInput.reset();
}
auto tokenIter = tokenList.begin();
auto tokenIterWaterMark = tokenIter;
std::size_t stateStackSizeWaterMark = stateStack.size();
const LR1DFAState *stateStackWaterMark = stateStack.top();
auto checkNextSymbol = [&]() -> SyntaxTree {
if (tempSymbolInput.has_value()) return tempSymbolInput.value();
while ((*tokenIter)->getType() == Token::Type::Whitespace || (*tokenIter)->getType() == Token::Type::Linewrap) tokenIter++;
return SyntaxTree(Vt(*tokenIter));
};
auto takeNextSymbol = [&]() -> SyntaxTree {
if (tempSymbolInput.has_value()) {
SyntaxTree ans = tempSymbolInput.value();
tempSymbolInput.reset();
return ans;
}
while ((*tokenIter)->getType() == Token::Type::Whitespace || (*tokenIter)->getType() == Token::Type::Linewrap) tokenIter++;
return SyntaxTree(Vt(*(tokenIter++)));
};
auto hasNextSymbol = [&]() -> bool {
return tempSymbolInput.has_value() || tokenIter != tokenList.end();
};
auto printNextSymbol = [&](const Symbol *s) {
std::cout << "Next Symbol is " << *s;
if (s->isVt())
std::cout << " at line " << s->toVt()->token->getLine();
std::cout << std::endl;
};
ActionList actionList;
while (true) {
Vt _streamNext(tokenIter < tokenList.end() ? *tokenIter : tokenList[0]);
if (symbolStack.size() == 1 && Vt::End.match(symbolStack.top().symbol())
&& tempSymbolInput.has_value() && Vn::Start.match( tempSymbolInput.value().symbol())
&& tokenIter < tokenList.end() && Vt::End.match(&_streamNext))
return tempSymbolInput.value();
if (hasNextSymbol()) {
actionList = stateStack.top()->qureyActionList(checkNextSymbol().symbol());
//printNextSymbol(checkNextSymbol().symbol());
//std::cout << "Current state is I" << indexOfState(*stateStack.top()) << std::endl;
}
if (actionList.size() == 0 || !hasNextSymbol()) { //回溯
//std::cout << std::endl << "No avaliable action! Rollback to previous state..." << std::endl;
if (tokenIter > tokenIterWaterMark) {
tokenIterWaterMark = tokenIter;
stateStackWaterMark = stateStack.top();
}
while (true) {
if (backTrackStack.empty())
throwError(tokenList, tokenIterWaterMark, stateStackWaterMark);
if (backTrackStack.top().actionList.hasNext()) break;
backTrackStack.pop();
}
stateStack = backTrackStack.top().stateStack;
symbolStack = backTrackStack.top().symbolStack;
actionList = backTrackStack.top().actionList;
tokenIter = backTrackStack.top().tokenIter;
tempSymbolInput = backTrackStack.top().tempSymbolInput;
backTrackStack.pop();
//printNextSymbol(checkNextSymbol().symbol());
//std::cout << "Current state is I" << indexOfState(*stateStack.top()) << std::endl;
}
Action &action = actionList.take();
if (actionList.size() > 1) { //冲突
backTrackStack.push({ stateStack, symbolStack, actionList, tokenIter, tempSymbolInput }); // 保存状态
//std::cout << "Mutli action found: " << std::endl;
//int index = 1;
//for (const auto a : actionList.actions) {
// std::cout << " " << index << ". ";
// if (a.type == Action::Type::ShiftIn) std::cout << "Shift in to I" << indexOfState(*a.action.shiftInState);
// else std::cout << "Reduce by " << *a.action.reduceProductor;
// std::cout << std::endl;
// index++;
//}
//std::cout << " Trying " << actionList.index << std::endl;
}
if (action.type == Action::Type::ShiftIn) {
auto s = takeNextSymbol();
shiftIn(s,action.action.shiftInState);
}
else if (action.type == Action::Type::Reduce) {
reduce(action.action.reduceProductor);
}
}
}
std::ostream &operator<<(std::ostream &out, const LR1Item &item)
{
out << *item.productor->left << " -> ";
if (item.productor->isEmpty())
out << "ε ●";
else {
int index = 0;
for (const auto s : item.productor->right) {
if (index == item.position)
out << " ● ";
out << *s ;
index++;
}
if(item.position == item.productor->length())
out << " ●";
}
out << ", ";
for (const auto i : item.accepted.symbols)
out << *i ;
if (item.accepted.includedEmpty)
out << "ε";
return out;
}
std::ostream &operator<<(std::ostream &out, const LR1DFA &dfa)
{
for (const auto &s : dfa.stateList) {
int currIndex = dfa.indexOfState(s);
out << Style(Style::Foreground::LightCyan, Style::Format::Bold) << "■ [-- I" << currIndex << " --]:" << Style() << std::endl;
bool hasShiftInItem = false;
out << Style(Style::Foreground::Cyan, Style::Format::Bold) << " Shift in items of I" << currIndex << ':' << Style() << std::endl;
for (const auto &i : s.itemSet.itemSet) {
if (i.isReduceState()) continue;
hasShiftInItem = true;
out << " " << i << std::endl;
}
if (!hasShiftInItem) out << " No shift in item." << std::endl;
out << Style(Style::Foreground::Cyan, Style::Format::Bold) << " Shift in action of I" << currIndex << ':' << Style() << std::endl;
for (const auto &a : s.shiftInAction) {
out << " " << *a.sym << " to " << 'I' << dfa.indexOfState(*a.next) << std::endl;
}
if (!hasShiftInItem) out << " No shift in action." << std::endl;
out << Style(Style::Foreground::Cyan, Style::Format::Bold) << " Reduce items of I" << currIndex << ':' << Style() << std::endl;
bool hasReduceItem = false;
for (const auto &i : s.itemSet.itemSet) {
if (!i.isReduceState()) continue;
hasReduceItem = true;
out << " " << i << std::endl;
}
if (!hasReduceItem) out << " No reduce item." << std::endl;
out << std::endl;
}
return out;
}
std::list<const LR1DFAState *> LR1DFAState::queryShiftInStateBySymbol(const Symbol *s) const
{
std::list<const LR1DFAState *> ans = {};
for (const auto &action : shiftInAction) {
if (action.sym->match(s)) ans.push_back(action.next);
}
return ans;
}
ActionList LR1DFAState::qureyActionList(const Symbol *s) const{
ActionList ans;
auto shiftIn = queryShiftInStateBySymbol(s);
for (auto const &a : shiftIn)
ans.actions.push_back(a);
auto nonEmptyReduce = itemSet.queryNonEmptyReduceItem(s);
for (auto const &a : nonEmptyReduce)
ans.actions.push_back(a.productor);
auto emptyReduce = itemSet.queryEmptyReduceItem(s);
for (auto const &a : emptyReduce)
ans.actions.push_back(a.productor);
return ans;
}