TokenParser.cc
Go to the documentation of this file.00001 #include "Kodu/Parsing/Parser.h"
00002
00003 namespace Kodu {
00004
00005 bool Parser::TokenParser::parseTokens(const std::vector<std::string>& koduStrings,
00006 std::vector<ParsedPage*>& parsedPages)
00007 {
00008 const std::string kIndentationMarker = ":";
00009 unsigned int currentPageIndex = 0;
00010 bool isFirstEntryInCode = true;
00011
00012
00013 initializeKeywordSet();
00014
00015
00016
00017 for (unsigned int pageNumb = 1; pageNumb <= 12; pageNumb++)
00018 parsedPages.push_back(new ParsedPage(pageNumb));
00019
00020
00021
00022 const std::size_t kKoduStringsSize = koduStrings.size();
00023
00024 for (std::size_t index = 0; index < kKoduStringsSize; index++) {
00025
00026 std::stringstream lineNumber;
00027 lineNumber << "Line " << (index + 1) << ": ";
00028
00029
00030
00031 std::vector<TokenBase*> ruleTokens;
00032
00033
00034
00035
00036 PARSER_ASSERT((tokenize(koduStrings[index], ruleTokens) == true),
00037 errorMessage << lineNumber << "There was an error tokenizing this line (see above).");
00038
00039
00040
00041 if (ruleTokens.empty())
00042 continue;
00043
00044
00045 PARSER_ASSERT(ruleTokens[0]->isKeywordToken(),
00046 errorMessage << lineNumber << "The first word of every line should not be "
00047 << "a Kodu keyword (except blank/empty lines).");
00048
00049
00050 ParsedPage* currPageParsing = parsedPages[currentPageIndex];
00051 ParsedRule* tempRule;
00052
00053
00054
00055
00056 if (ruleTokens[0]->getKeywordData() == "PAGE") {
00057
00058
00059 if (isFirstEntryInCode) {
00060 isFirstEntryInCode = false;
00061 }
00062
00063
00064 PARSER_ASSERT((static_cast<int>(ruleTokens.size()) == 2),
00065 errorMessage << lineNumber << "New page lines should only "
00066 << "have the PAGE identifier and a number from 1 - 12.");
00067
00068
00069 PARSER_ASSERT(ruleTokens[1]->isNumericToken(),
00070 errorMessage << lineNumber << "The page identifier is not a number.");
00071
00072
00073 unsigned int pageId = static_cast<int>(ruleTokens[1]->getNumericData());
00074 PARSER_ASSERT((1 <= pageId && pageId <= 12),
00075 errorMessage << lineNumber << "Page number must be 1 through 12.");
00076
00077
00078 currentPageIndex = pageId - 1;
00079 PARSER_ASSERT((parsedPages[currentPageIndex]->getRuleCount() == 0),
00080 errorMessage << lineNumber << "This page already has rules. "
00081 << "You cannot add rules to this page again.");
00082
00083 tempRule = NULL;
00084 currPageParsing = NULL;
00085
00086
00087 GeneralFncs::destroyAllPtrsInVector(ruleTokens);
00088
00089
00090
00091 continue;
00092 }
00093
00094
00095 if (ruleTokens[0]->getKeywordData() == kIndentationMarker) {
00096
00097
00098 PARSER_ASSERT((!isFirstEntryInCode),
00099 errorMessage << lineNumber << "A PAGE declaration must be the "
00100 << "first thing in the file.");
00101
00102
00103 PARSER_ASSERT((currPageParsing->getRuleCount() > 0),
00104 errorMessage << lineNumber << "This is the first rule on the page. "
00105 << "It should not be indented.");
00106
00107
00108
00109 unsigned int indentLvlCount = 1;
00110
00111
00112
00113 GeneralFncs::destroyPtrInVector(ruleTokens, 0);
00114
00115 while (indentLvlCount < ruleTokens.size()) {
00116
00117 PARSER_ASSERT((ruleTokens.size() >= 2),
00118 errorMessage << lineNumber << "This line does not seem to have the "
00119 << "WHEN and DO identifiers.");
00120
00121
00122 PARSER_ASSERT(ruleTokens[0]->isKeywordToken(),
00123 errorMessage << lineNumber << "Token #" << (indentLvlCount + 1)
00124 << " is not valid. It should be either a '" << kIndentationMarker
00125 << "' marker or a WHEN identifier.");
00126
00127
00128 if (ruleTokens[0]->getKeywordData() == "WHEN") {
00129
00130 break;
00131 }
00132
00133
00134 PARSER_ASSERT((ruleTokens[0]->getKeywordData() == kIndentationMarker),
00135 errorMessage << lineNumber << "Token #" << (indentLvlCount + 1)
00136 << " should be a '" << kIndentationMarker << "' or a WHEN.");
00137
00138
00139 PARSER_ASSERT((ruleTokens[0]->getKeywordData().size() == 1),
00140 errorMessage << lineNumber << "Token #" << (indentLvlCount + 1)
00141 << " should only have a length of " << kIndentationMarker.size() << ".");
00142
00143
00144 indentLvlCount++;
00145
00146
00147
00148 GeneralFncs::destroyPtrInVector(ruleTokens, 0);
00149 }
00150
00151
00152 unsigned int prevRuleIndentCount =
00153 currPageParsing->getRuleInPos(currPageParsing->getRuleCount() - 1)->
00154 getIndentationLevel();
00155
00156
00157
00158 PARSER_ASSERT((indentLvlCount <= prevRuleIndentCount + 1),
00159 errorMessage << lineNumber << "There is(are) too many indentation(s) on this line. "
00160 << "Max allowable is either " << prevRuleIndentCount
00161 << " or " << (prevRuleIndentCount + 1) << " for this line. "
00162 << "This line has " << indentLvlCount << ".");
00163
00164
00165 tempRule = new ParsedRule();
00166 tempRule->setIndentationLevel(indentLvlCount);
00167
00168
00169 for (int ruleIndex = (currPageParsing->getRuleCount() - 1); ruleIndex >= 0; ruleIndex--)
00170 {
00171 if (currPageParsing->getRuleInPos(ruleIndex)->getIndentationLevel() == indentLvlCount - 1)
00172 {
00173 tempRule->setParentNumber(ruleIndex + 1);
00174 break;
00175 }
00176 }
00177
00178
00179 }
00180
00181
00182 if (ruleTokens[0]->getKeywordData() == "WHEN") {
00183
00184
00185 PARSER_ASSERT((!isFirstEntryInCode),
00186 errorMessage << lineNumber << "A PAGE declaration must be the "
00187 << "first thing in the document.");
00188
00189
00190 if (tempRule == NULL)
00191 tempRule = new ParsedRule();
00192
00193
00194 int doPos = 0;
00195 PARSER_ASSERT(((doPos = contains(ruleTokens, "DO")) > 0),
00196 errorMessage << lineNumber << "Cannot find the DO identifier on this line.");
00197
00198
00199 std::vector<TokenBase*> conditionTokens, actionTokens;
00200 conditionTokens = GeneralFncs::subVector(ruleTokens, 1, doPos);
00201 actionTokens = GeneralFncs::subVector(ruleTokens, doPos + 1, ruleTokens.size());
00202
00203
00204 ParsedPhrase* condition = new ParsedPhrase();
00205 ParsedPhrase* action = new ParsedPhrase();
00206
00207
00208 if (!conditionTokens.empty()) {
00209 condition->setPhraseHead(conditionTokens[0]);
00210 condition->setPhraseModifiers(GeneralFncs::subVector(conditionTokens, 1,
00211 conditionTokens.size()));
00212 } else {
00213 condition->setPhraseHead(new KeywordToken("always"));
00214 }
00215
00216
00217 if (!actionTokens.empty()) {
00218 action->setPhraseHead(actionTokens[0]);
00219 action->setPhraseModifiers(GeneralFncs::subVector(actionTokens, 1, actionTokens.size()));
00220 } else {
00221 action->setPhraseHead(new KeywordToken("do_nothing"));
00222 }
00223
00224
00225 tempRule->setRuleNumber(currPageParsing->getRuleCount() + 1);
00226 tempRule->setConditionPhrase(condition);
00227 tempRule->setActionPhrase(action);
00228
00229
00230 currPageParsing->addRule(tempRule);
00231
00232
00233 condition = NULL;
00234 action = NULL;
00235 tempRule = NULL;
00236 currPageParsing = NULL;
00237
00238
00239 GeneralFncs::destroyPtrInVector(ruleTokens, doPos);
00240 GeneralFncs::destroyPtrInVector(ruleTokens, 0);
00241
00242
00243
00244 continue;
00245 }
00246
00247
00248 PARSER_ASSERT(ERROR,
00249 errorMessage << lineNumber << "(Col 1) Unknown token. "
00250 << "It does not begin with PAGE, indentation markers '"
00251 << kIndentationMarker << "', a WHEN identifier, or whitespace (tabs, spaces, etc.).");
00252 }
00253
00254
00255 clearKeywordSet();
00256 return true;
00257 }
00258
00259 bool Parser::TokenParser::readText(std::vector<std::string>& koduStrings) {
00260 const char* fileName = "my.kode";
00261 std::cout << "============ Reading Kodu code from " << fileName << " ============\n";
00262 std::string line;
00263 std::ifstream kodufile(fileName);
00264 if (kodufile.is_open()) {
00265 while (kodufile.good()) {
00266 getline(kodufile, line);
00267 koduStrings.push_back(line);
00268 }
00269 kodufile.close();
00270 std::cout << "============ Reading Complete ============\n";
00271 } else {
00272 std::cerr << "Error reading the file.\n";
00273 return false;
00274 }
00275 return true;
00276 }
00277
00278 int Parser::TokenParser::contains(const std::vector<TokenBase*>& tokens, const std::string& searchItem) {
00279 const int kSize = tokens.size();
00280 for (int i = 0; i < kSize; i++) {
00281 if (tokens[i]->isKeywordToken() && tokens[i]->getKeywordData() == searchItem)
00282 return i;
00283 }
00284 return -1;
00285 }
00286 }