TokenParser.cc

Go to the documentation of this file.
00001 #include "Kodu/Parsing/Parser.h"
00002 
00003 namespace Kodu {
00004         
00005     bool Parser::TokenParser::parseTokens(const std::vector<std::string>& koduStrings,
00006         std::vector<ParsedPage*>& parsedPages)
00007     {
00008         const std::string kIndentationMarker = ":";
00009         unsigned int currentPageIndex = 0;          // states which page is currently being parsed
00010         bool isFirstEntryInCode = true;             // used to make sure the first line is a PAGE declaration
00011 
00012         // intialize the set container that contains all the keyowrds for Kodu language (that we are using)
00013         initializeKeywordSet();
00014 
00015         // preconstruct 12 empty pages
00016         // std::cout << "Constructing the 12 temp pages...";
00017         for (unsigned int pageNumb = 1; pageNumb <= 12; pageNumb++)
00018             parsedPages.push_back(new ParsedPage(pageNumb));
00019         // std::cout << "done.\n";
00020 
00021         // main parsing loop
00022         const std::size_t kKoduStringsSize = koduStrings.size();
00023         // std::cout << "Parsing tokens...\n";
00024         for (std::size_t index = 0; index < kKoduStringsSize; index++) {
00025             // create error header (to print on stdout)
00026             std::stringstream lineNumber;
00027             lineNumber << "Line " << (index + 1) << ": ";
00028             //std::string lineNumber = header.str();
00029 
00030             // tokenize the current line
00031             std::vector<TokenBase*> ruleTokens;
00032             
00033             // convert rule into tokens
00034             // ASSERTIONS: tokenizing process will be successful
00035             // std::cout << "Creating tokens from string...\n";
00036             PARSER_ASSERT((tokenize(koduStrings[index], ruleTokens) == true),
00037                 errorMessage << lineNumber << "There was an error tokenizing this line (see above).");
00038             // std::cout << "Tokens created.\n";
00039 
00040             // make sure this line has at least two tokens or each move on
00041             if (ruleTokens.empty())
00042                 continue;
00043 
00044             // ASSERTION: the first token on every line should be a string
00045             PARSER_ASSERT(ruleTokens[0]->isKeywordToken(),
00046                 errorMessage << lineNumber << "The first word of every line should not be "
00047                 << "a Kodu keyword (except blank/empty lines).");
00048 
00049             // reference the current page
00050             ParsedPage* currPageParsing = parsedPages[currentPageIndex];
00051             ParsedRule* tempRule;
00052 
00053 // ======================================= Page Identifier Check ====================================== //
00054             // checks the first char of the first token
00055             // check if it is a page
00056             if (ruleTokens[0]->getKeywordData() == "PAGE") {
00057               // std::cout << "Parsing a page...\n";
00058                 // makes sure the first thing declared is a page
00059                 if (isFirstEntryInCode) {
00060                     isFirstEntryInCode = false;
00061                 }
00062                 
00063                 // ASSERTION: there are only two tokens on this line
00064                 PARSER_ASSERT((static_cast<int>(ruleTokens.size()) == 2),
00065                     errorMessage << lineNumber << "New page lines should only "
00066                     << "have the PAGE identifier and a number from 1 - 12.");
00067 
00068                 // ASSERTION: the second token is a numeric token
00069                 PARSER_ASSERT(ruleTokens[1]->isNumericToken(),
00070                     errorMessage << lineNumber << "The page identifier is not a number.");
00071 
00072                 // ASSERTION: the numeric token has a value of 1 - 12
00073                 unsigned int pageId = static_cast<int>(ruleTokens[1]->getNumericData());
00074                 PARSER_ASSERT((1 <= pageId && pageId <= 12),
00075                     errorMessage << lineNumber << "Page number must be 1 through 12.");
00076 
00077                 // ASSERTION: there are no rules on this page
00078                 currentPageIndex = pageId - 1;  // states the page that's currently being manipulated
00079                 PARSER_ASSERT((parsedPages[currentPageIndex]->getRuleCount() == 0),
00080                     errorMessage << lineNumber << "This page already has rules. "
00081                     << "You cannot add rules to this page again.");
00082                 
00083                 tempRule = NULL;
00084                 currPageParsing = NULL;
00085                 
00086                 // some bookkeeping
00087                 GeneralFncs::destroyAllPtrsInVector(ruleTokens);
00088 
00089                 // std::cout << "Finished parsing page declaration: PAGE " << pageId << std::endl;
00090                 // move onto the next iteration
00091                 continue;
00092             }
00093             
00094 // ===================================== Indentation Marker Check ===================================== //
00095             if (ruleTokens[0]->getKeywordData() == kIndentationMarker) {
00096               // std::cout << "Parsing an indented rule...\n";
00097                     // ASSERTION: this is not the first line in the document
00098                 PARSER_ASSERT((!isFirstEntryInCode),
00099                     errorMessage << lineNumber << "A PAGE declaration must be the "
00100                     << "first thing in the file.");
00101 
00102                 // ASSERTION: this is not the first rule on this page
00103                 PARSER_ASSERT((currPageParsing->getRuleCount() > 0),
00104                     errorMessage << lineNumber << "This is the first rule on the page. "
00105                     << "It should not be indented.");
00106 
00107                 // count the number of indentations on this line
00108                 // NOTE: starting at 1 because we already know index 0 is an indentation marker
00109                 unsigned int indentLvlCount = 1;
00110                 
00111                 // delete and erase the first instance (since we are starting at one)
00112                 // std::cout << "Erased " << indentLvlCount << " indentation(s).\n";
00113                 GeneralFncs::destroyPtrInVector(ruleTokens, 0);
00114                 
00115                 while (indentLvlCount < ruleTokens.size()) {
00116                     // ASSERTION: there is at least two more tokens on this line (WHEN and DO)
00117                     PARSER_ASSERT((ruleTokens.size() >= 2),
00118                         errorMessage << lineNumber << "This line does not seem to have the "
00119                         << "WHEN and DO identifiers.");
00120                     
00121                     // ASSERTION: the current token is a keyword token
00122                     PARSER_ASSERT(ruleTokens[0]->isKeywordToken(),
00123                         errorMessage << lineNumber << "Token #" << (indentLvlCount + 1)
00124                         << " is not valid. It should be either a '" << kIndentationMarker
00125                         << "' marker or a WHEN identifier.");
00126                     
00127                     // found the WHEN identifier!
00128                     if (ruleTokens[0]->getKeywordData() == "WHEN") {
00129                       // std::cout << "Found a WHEN keyword!\n";
00130                         break;
00131                     }
00132                     
00133                     // ASSERTION: the token is an indentation marker
00134                     PARSER_ASSERT((ruleTokens[0]->getKeywordData() == kIndentationMarker),
00135                         errorMessage << lineNumber << "Token #" << (indentLvlCount + 1)
00136                         << " should be a '" << kIndentationMarker << "' or a WHEN.");
00137 
00138                     // ASSERTION: the length of this string is one
00139                     PARSER_ASSERT((ruleTokens[0]->getKeywordData().size() == 1),
00140                         errorMessage << lineNumber << "Token #" << (indentLvlCount + 1)
00141                         << " should only have a length of " << kIndentationMarker.size() << ".");
00142 
00143                     // increase the indentation levels
00144                     indentLvlCount++;
00145 
00146                     // while searching for the WHEN identifier, delete each indentation marker
00147                     // std::cout << "Erased " << indentLvlCount << " indentation(s).\n";
00148                     GeneralFncs::destroyPtrInVector(ruleTokens, 0);
00149                 }
00150 
00151                 // get the indentation level of the previous rule
00152                 unsigned int prevRuleIndentCount =
00153                     currPageParsing->getRuleInPos(currPageParsing->getRuleCount() - 1)->
00154                     getIndentationLevel();
00155 
00156                 // ASSERTION: this line has no more than X + 1 indentation levels
00157                 //            (X = previous line indentation level)
00158                 PARSER_ASSERT((indentLvlCount <= prevRuleIndentCount + 1),
00159                     errorMessage << lineNumber << "There is(are) too many indentation(s) on this line. "
00160                     << "Max allowable is either " << prevRuleIndentCount
00161                     << " or " << (prevRuleIndentCount + 1) << " for this line. "
00162                     << "This line has " << indentLvlCount << ".");
00163 
00164                 // set indentation level 
00165                 tempRule = new ParsedRule();
00166                 tempRule->setIndentationLevel(indentLvlCount);
00167 
00168                 // set parent rule
00169                 for (int ruleIndex = (currPageParsing->getRuleCount() - 1); ruleIndex >= 0; ruleIndex--)
00170                 {
00171                     if (currPageParsing->getRuleInPos(ruleIndex)->getIndentationLevel() == indentLvlCount - 1)
00172                     {
00173                         tempRule->setParentNumber(ruleIndex + 1);
00174                         break;
00175                     }
00176                 }
00177                 // Proceed to the other case
00178                 // std::cout << "Finished handling indentations.\n";
00179             }
00180             
00181 // ======================================= When Identifier Check ====================================== //
00182             if (ruleTokens[0]->getKeywordData() == "WHEN") {
00183               // std::cout << "Parsing a rule...\n";
00184                 // ASSERTION: this is not the first line in the document
00185                 PARSER_ASSERT((!isFirstEntryInCode),
00186                     errorMessage << lineNumber <<  "A PAGE declaration must be the "
00187                     << "first thing in the document.");
00188 
00189                 // if tempRule was created before, create a new instance
00190                 if (tempRule == NULL)
00191                     tempRule = new ParsedRule();
00192 
00193                 // ASSERTION: DO is in this token list
00194                 int doPos = 0;
00195                 PARSER_ASSERT(((doPos = contains(ruleTokens, "DO")) > 0),
00196                     errorMessage << lineNumber << "Cannot find the DO identifier on this line.");
00197 
00198                 // create two vectors: one for the condition and the other for the action
00199                 std::vector<TokenBase*> conditionTokens, actionTokens;
00200                 conditionTokens = GeneralFncs::subVector(ruleTokens, 1, doPos);
00201                 actionTokens = GeneralFncs::subVector(ruleTokens, doPos + 1, ruleTokens.size());
00202 
00203                 // setup the Phrases for this rule
00204                 ParsedPhrase* condition = new ParsedPhrase();
00205                 ParsedPhrase* action = new ParsedPhrase();
00206 
00207                 // check if the condition was empty (implicit always)
00208                 if (!conditionTokens.empty()) {
00209                     condition->setPhraseHead(conditionTokens[0]);
00210                     condition->setPhraseModifiers(GeneralFncs::subVector(conditionTokens, 1,
00211                         conditionTokens.size()));
00212                 } else {
00213                     condition->setPhraseHead(new KeywordToken("always"));
00214                 }
00215 
00216                 // check if the action was empty (implicit do nothing)
00217                 if (!actionTokens.empty()) {
00218                     action->setPhraseHead(actionTokens[0]);
00219                     action->setPhraseModifiers(GeneralFncs::subVector(actionTokens, 1, actionTokens.size()));
00220                 } else {
00221                     action->setPhraseHead(new KeywordToken("do_nothing"));
00222                 }
00223 
00224                 // set rule number, condition, and action
00225                 tempRule->setRuleNumber(currPageParsing->getRuleCount() + 1);
00226                 tempRule->setConditionPhrase(condition);
00227                 tempRule->setActionPhrase(action);
00228 
00229                 // add this rule to the current page
00230                 currPageParsing->addRule(tempRule);
00231 
00232                 // NULL all pointers used (bookkeeping prevents segmentation faults from out of scope)
00233                 condition = NULL;
00234                 action = NULL;
00235                 tempRule = NULL;
00236                 currPageParsing = NULL;
00237                 
00238                 // some bookkeeping
00239                 GeneralFncs::destroyPtrInVector(ruleTokens, doPos);
00240                 GeneralFncs::destroyPtrInVector(ruleTokens, 0);
00241                 
00242                 // move on to the next iteration
00243                 // std::cout << "Finished parsing rule on line " << (index + 1) << ".\n";
00244                 continue;
00245             }
00246             
00247 // =================================== Default response if not any of the others ======================== //
00248             PARSER_ASSERT(ERROR,
00249             errorMessage << lineNumber << "(Col 1) Unknown token. "
00250             << "It does not begin with PAGE, indentation markers '"
00251             << kIndentationMarker << "', a WHEN identifier, or whitespace (tabs, spaces, etc.).");
00252         }
00253         
00254         // clear the set of Kodu keywords (they are no longer needed)
00255         clearKeywordSet();
00256         return true;
00257     }
00258     
00259     bool Parser::TokenParser::readText(std::vector<std::string>& koduStrings) {
00260         const char* fileName = "my.kode";
00261         std::cout << "============ Reading Kodu code from " << fileName << " ============\n";
00262         std::string line;
00263         std::ifstream kodufile(fileName);
00264         if (kodufile.is_open()) {
00265             while (kodufile.good()) {
00266                 getline(kodufile, line);
00267                 koduStrings.push_back(line);
00268             }
00269             kodufile.close();
00270             std::cout << "============ Reading Complete ============\n";
00271         } else {
00272             std::cerr << "Error reading the file.\n";
00273             return false;
00274         }
00275         return true;
00276     }
00277     
00278     int Parser::TokenParser::contains(const std::vector<TokenBase*>& tokens, const std::string& searchItem) {
00279         const int kSize = tokens.size();
00280         for (int i = 0; i < kSize; i++) {
00281             if (tokens[i]->isKeywordToken() && tokens[i]->getKeywordData() == searchItem)
00282                 return i;
00283         }
00284         return -1;
00285     }
00286 }