Tokenize.cc
Go to the documentation of this file.00001 
00002 #include "Kodu/Parsing/Parser.h"
00003 
00004 
00005 #include <cctype>
00006 
00007 namespace Kodu {
00008 
00009     bool Parser::TokenParser::tokenize(const std::string& kString, std::vector<TokenBase*>& tokens) {
00010         std::size_t pos = 0;
00011         const std::size_t kSize = kString.size();
00012         while (pos < kSize) {
00013             
00014             char currChar = kString[pos];
00015 
00016             
00017             PARSER_ASSERT((currChar == '"' || currChar == ' ' || isalnum(currChar) || currChar == '<'
00018                 || currChar == '>' || currChar == ':'),
00019                 errorMessage << "The character '" << currChar << "' is unrecognized.");
00020 
00021             
00022             
00023             if (currChar == '"') {
00024                 
00025                 std::size_t nextDqOccurence = kString.find('"', pos + 1);
00026 
00027                 
00028                 PARSER_ASSERT((nextDqOccurence != std::string::npos),
00029                     errorMessage << "Could not find a closing double quote (first occurrence of (\") at Col "
00030                         << (pos + 1) << ".");
00031 
00032                 
00033                 tokens.push_back(new StringToken(kString.substr(pos + 1, nextDqOccurence - pos - 1)));
00034 
00035                 
00036                 pos = nextDqOccurence + 1;
00037                 continue;
00038             }
00039 
00040             if (currChar == ':') {
00041                 
00042                 PARSER_ASSERT((pos + 1 < kSize && kString[pos + 1] == ' '),
00043                     errorMessage << "(Col " << (pos + 2) << ") The character '"
00044                     << kString[pos + 1] << "' is an invalid character.");
00045 
00046                 
00047                 tokens.push_back(new KeywordToken(":"));
00048 
00049                 
00050                 pos = pos + 2;
00051             }
00052 
00053             
00054             if (isalpha(currChar)) {
00055                 
00056                 std::size_t nextWsOccurence = kString.find(' ', pos + 1);
00057 
00058                 
00059                 std::string keyword;
00060 
00061                 
00062                 if (nextWsOccurence == std::string::npos)
00063                     keyword = kString.substr(pos);
00064                 else
00065                     keyword = kString.substr(pos, nextWsOccurence - pos);
00066 
00067                 
00068                 PARSER_ASSERT((koduKeywords.count(keyword) == 1),
00069                     errorMessage << "(Col " << (pos + 1) << ") The token \"" << keyword
00070                     << "\" is not a recognized Kodu keyword.");
00071 
00072                 
00073                 tokens.push_back(new KeywordToken(keyword));
00074 
00075                 
00076                 if (nextWsOccurence == std::string::npos)
00077                     pos = kSize;
00078                 else
00079                     pos = nextWsOccurence + 1;
00080                 continue;
00081             }
00082 
00083             
00084             if (isdigit(currChar)) {
00085                 
00086                 std::size_t tempPos = pos + 1;
00087                 std::size_t stopPos = 0;
00088                 unsigned int dotCount = 0;
00089                 bool hasOnlyZeros = true;
00090 
00091                 
00092                 std::size_t nextWsOccurence = kString.find(' ', tempPos);
00093 
00094                 
00095                 if (nextWsOccurence == std::string::npos)
00096                     stopPos = kString.size();
00097                 else
00098                     stopPos = nextWsOccurence;
00099 
00100                 
00101                 while (tempPos < stopPos) {
00102                     
00103                     PARSER_ASSERT((isdigit(kString[tempPos]) || (kString[tempPos] == '.')),
00104                         errorMessage << "(Col " << (tempPos + 1) << ") The character '"
00105                         << kString[tempPos] << "' in token \"" << kString.substr(pos, stopPos - pos)
00106                         << "\" is not a digit [0-9] or a decimal point (.).");
00107 
00108                     
00109                     if (kString[tempPos] == '.') {
00110                         dotCount++;
00111                     }
00112 
00113                     
00114                     PARSER_ASSERT((dotCount == 0 || dotCount == 1),
00115                         errorMessage << "(Col " << (tempPos + 1)
00116                         << ") There is an additional decimal point (.) in the token \""
00117                         << kString.substr(pos, stopPos - pos) << "\".");
00118 
00119                     
00120                     if (isdigit(kString[tempPos]) && kString[tempPos] != '0') {
00121                         hasOnlyZeros = false;
00122                     }
00123 
00124                     
00125                     tempPos++;
00126                 }
00127 
00128                 
00129                 PARSER_ASSERT((kString[tempPos] != '.'),
00130                     errorMessage << "(Col " << (tempPos + 1)
00131                     << ") A decimal point cannot the last character of a number.");
00132 
00133                 
00134                 if (kString[pos] == '0') {
00135                   PARSER_ASSERT( ((kString[pos + 1] == '.') || (pos+1 == tempPos)),
00136                         errorMessage << "(Col " << (pos + 2)
00137                         << ") A number can only begin with zero if the zero is followed by a decimal point.");
00138                 }
00139 
00140                 
00141                 float value = (float)strtod(kString.substr(pos, stopPos - pos).c_str(), NULL);
00142 
00143                 
00144                 
00145                 if (!hasOnlyZeros) {
00146                     
00147                     PARSER_ASSERT((value > 0.0f),
00148                         errorMessage << "(Col " << (pos + 1)
00149                         << ") There was an error converting \""
00150                         << kString.substr(pos, stopPos - pos) << "\" to a floating-point number. "
00151                         << "Numbers should only contain digits and one dot, if needed.");
00152                 }
00153 
00154                 
00155                 tokens.push_back(new NumericToken(value));
00156 
00157                 
00158                 pos = stopPos + 1;
00159                 continue;
00160             }
00161 
00162             
00163             if (currChar == '<' || currChar == '>') {
00164                 
00165                 PARSER_ASSERT((pos + 2 < kString.size()),
00166                     errorMessage << "(Col " << (pos + 1)
00167                     << ") Invalid use of the inequality signs. There must be a number after the signs.");
00168 
00169                 
00170                 
00171                 PARSER_ASSERT((kString[pos + 1] == '=' && kString[pos + 2] == ' '),
00172                     errorMessage << "(Col " << (pos + 1)
00173                     << ") Invalid use of inequality signs. Correct usage: [number] [space] <= [space] [number]."
00174                     << "(E.g. 10 <= 13).");
00175 
00176                 
00177                 if (currChar == '<')
00178                     tokens.push_back(new KeywordToken("<="));
00179                 else
00180                     tokens.push_back(new KeywordToken(">="));
00181 
00182                 
00183                 pos = pos + 3;
00184                 continue;
00185             }
00186 
00187             
00188             if (isspace(kString[pos])) {
00189                 pos++;
00190                 continue;
00191             }
00192         }
00193         return true;
00194     }
00195 }