Tokenize.cc
Go to the documentation of this file.00001
00002 #include "Kodu/Parsing/Parser.h"
00003
00004
00005 #include <cctype>
00006
00007 namespace Kodu {
00008
00009 bool Parser::TokenParser::tokenize(const std::string& kString, std::vector<TokenBase*>& tokens) {
00010 std::size_t pos = 0;
00011 const std::size_t kSize = kString.size();
00012 while (pos < kSize) {
00013
00014 char currChar = kString[pos];
00015
00016
00017 PARSER_ASSERT((currChar == '"' || currChar == ' ' || isalnum(currChar) || currChar == '<'
00018 || currChar == '>' || currChar == ':'),
00019 errorMessage << "The character '" << currChar << "' is unrecognized.");
00020
00021
00022
00023 if (currChar == '"') {
00024
00025 std::size_t nextDqOccurence = kString.find('"', pos + 1);
00026
00027
00028 PARSER_ASSERT((nextDqOccurence != std::string::npos),
00029 errorMessage << "Could not find a closing double quote (first occurrence of (\") at Col "
00030 << (pos + 1) << ".");
00031
00032
00033 tokens.push_back(new StringToken(kString.substr(pos + 1, nextDqOccurence - pos - 1)));
00034
00035
00036 pos = nextDqOccurence + 1;
00037 continue;
00038 }
00039
00040 if (currChar == ':') {
00041
00042 PARSER_ASSERT((pos + 1 < kSize && kString[pos + 1] == ' '),
00043 errorMessage << "(Col " << (pos + 2) << ") The character '"
00044 << kString[pos + 1] << "' is an invalid character.");
00045
00046
00047 tokens.push_back(new KeywordToken(":"));
00048
00049
00050 pos = pos + 2;
00051 }
00052
00053
00054 if (isalpha(currChar)) {
00055
00056 std::size_t nextWsOccurence = kString.find(' ', pos + 1);
00057
00058
00059 std::string keyword;
00060
00061
00062 if (nextWsOccurence == std::string::npos)
00063 keyword = kString.substr(pos);
00064 else
00065 keyword = kString.substr(pos, nextWsOccurence - pos);
00066
00067
00068 PARSER_ASSERT((koduKeywords.count(keyword) == 1),
00069 errorMessage << "(Col " << (pos + 1) << ") The token \"" << keyword
00070 << "\" is not a recognized Kodu keyword.");
00071
00072
00073 tokens.push_back(new KeywordToken(keyword));
00074
00075
00076 if (nextWsOccurence == std::string::npos)
00077 pos = kSize;
00078 else
00079 pos = nextWsOccurence + 1;
00080 continue;
00081 }
00082
00083
00084 if (isdigit(currChar)) {
00085
00086 std::size_t tempPos = pos + 1;
00087 std::size_t stopPos = 0;
00088 unsigned int dotCount = 0;
00089 bool hasOnlyZeros = true;
00090
00091
00092 std::size_t nextWsOccurence = kString.find(' ', tempPos);
00093
00094
00095 if (nextWsOccurence == std::string::npos)
00096 stopPos = kString.size();
00097 else
00098 stopPos = nextWsOccurence;
00099
00100
00101 while (tempPos < stopPos) {
00102
00103 PARSER_ASSERT((isdigit(kString[tempPos]) || (kString[tempPos] == '.')),
00104 errorMessage << "(Col " << (tempPos + 1) << ") The character '"
00105 << kString[tempPos] << "' in token \"" << kString.substr(pos, stopPos - pos)
00106 << "\" is not a digit [0-9] or a decimal point (.).");
00107
00108
00109 if (kString[tempPos] == '.') {
00110 dotCount++;
00111 }
00112
00113
00114 PARSER_ASSERT((dotCount == 0 || dotCount == 1),
00115 errorMessage << "(Col " << (tempPos + 1)
00116 << ") There is an additional decimal point (.) in the token \""
00117 << kString.substr(pos, stopPos - pos) << "\".");
00118
00119
00120 if (isdigit(kString[tempPos]) && kString[tempPos] != '0') {
00121 hasOnlyZeros = false;
00122 }
00123
00124
00125 tempPos++;
00126 }
00127
00128
00129 PARSER_ASSERT((kString[tempPos] != '.'),
00130 errorMessage << "(Col " << (tempPos + 1)
00131 << ") A decimal point cannot the last character of a number.");
00132
00133
00134 if (kString[pos] == '0') {
00135 PARSER_ASSERT( ((kString[pos + 1] == '.') || (pos+1 == tempPos)),
00136 errorMessage << "(Col " << (pos + 2)
00137 << ") A number can only begin with zero if the zero is followed by a decimal point.");
00138 }
00139
00140
00141 float value = (float)strtod(kString.substr(pos, stopPos - pos).c_str(), NULL);
00142
00143
00144
00145 if (!hasOnlyZeros) {
00146
00147 PARSER_ASSERT((value > 0.0f),
00148 errorMessage << "(Col " << (pos + 1)
00149 << ") There was an error converting \""
00150 << kString.substr(pos, stopPos - pos) << "\" to a floating-point number. "
00151 << "Numbers should only contain digits and one dot, if needed.");
00152 }
00153
00154
00155 tokens.push_back(new NumericToken(value));
00156
00157
00158 pos = stopPos + 1;
00159 continue;
00160 }
00161
00162
00163 if (currChar == '<' || currChar == '>') {
00164
00165 PARSER_ASSERT((pos + 2 < kString.size()),
00166 errorMessage << "(Col " << (pos + 1)
00167 << ") Invalid use of the inequality signs. There must be a number after the signs.");
00168
00169
00170
00171 PARSER_ASSERT((kString[pos + 1] == '=' && kString[pos + 2] == ' '),
00172 errorMessage << "(Col " << (pos + 1)
00173 << ") Invalid use of inequality signs. Correct usage: [number] [space] <= [space] [number]."
00174 << "(E.g. 10 <= 13).");
00175
00176
00177 if (currChar == '<')
00178 tokens.push_back(new KeywordToken("<="));
00179 else
00180 tokens.push_back(new KeywordToken(">="));
00181
00182
00183 pos = pos + 3;
00184 continue;
00185 }
00186
00187
00188 if (isspace(kString[pos])) {
00189 pos++;
00190 continue;
00191 }
00192 }
00193 return true;
00194 }
00195 }