#include "Lexer.h"
Lexer::Lexer(char* buffer, vector<Token>* tokens)
{
this->buffer = buffer; //save the buffer pointer
this->bufferLength = strlen(buffer); //save the buffer length
this->Tokens = tokens; //save the pointer
//setup everything
this->MultiLineFoundComments = 0L;
this->SingleLineFoundComments = 0L;
}
short Lexer::Analyze(void)
{
//check if the buffer is null or empty and return the proper error if so
if (this->buffer == (char*)NULL)
return NULL_BUFFER;
else if (this->bufferLength == 0)
return EMPTY_BUFFER;
//what is the lexer reading?
LexReading currentlyReading;
unsigned long currentBufferCharacter = 0L; //the character that the lexer will read
unsigned long currentBufferRow = 1L; //the user will know the exact line of an error (or warning) if any
unsigned long currentBufferColumn = 1L; //the user will know the exact line of an error (or warning) if any
vector<char> temp; //a temporary vector to store the characters of identifiers, numbers, strings and chars
while (currentBufferCharacter < this->bufferLength)
{
switch (currentlyReading)
{
case MultiLineComment:
//if this is the end of the comment
if ((this->buffer[currentBufferCharacter] == '*') && (this->buffer[currentBufferCharacter + 1] == '/'))
{
currentlyReading = Source; //the lexer is going to read source code again
this->MultiLineFoundComments++; //update the number of multi line comments lexed
currentBufferCharacter++; //the lexer won't read the / character the next step
} else if (this->buffer[currentBufferCharacter] == '\n') {
currentBufferRow++; //update the row number
currentBufferColumn = 1L; //update the column number
}
//else do nothing, I don't care about comments
break;
case SingleLineComment:
//if this is the end of the line
if (this->buffer[currentBufferCharacter] == '\n')
{
currentlyReading = Source; //the lexer is going to read source code again
this->SingleLineFoundComments++; //update the number of single line comments lexed
currentBufferRow++; //update the row number
currentBufferColumn = 1L; //update the column number
}
//else do nothing, I don't care about comments
break;
case Number:
//if this is the end of the number
if (((this->buffer[currentBufferCharacter] < 48) || (this->buffer[currentBufferCharacter] > 57)) && ((this->buffer[currentBufferCharacter] != '.') && (this->buffer[currentBufferCharacter] != 'D') && (this->buffer[currentBufferCharacter] != 'X') && (this->buffer[currentBufferCharacter] != 'B') && (this->buffer[currentBufferCharacter] != 'd') && (this->buffer[currentBufferCharacter] != 'x') && (this->buffer[currentBufferCharacter] != 'b') && (this->buffer[currentBufferCharacter] != 'a')&& (this->buffer[currentBufferCharacter] != 'A') && (this->buffer[currentBufferCharacter] != 'b') && (this->buffer[currentBufferCharacter] != 'C') && (this->buffer[currentBufferCharacter] != 'c') && (this->buffer[currentBufferCharacter] != 'd') && (this->buffer[currentBufferCharacter] != 'D') && (this->buffer[currentBufferCharacter] != 'e') && (this->buffer[currentBufferCharacter] != 'E') && (this->buffer[currentBufferCharacter] != 'f') && (this->buffer[currentBufferCharacter] != 'F')))
{ //then save the number stored in temp
size_t characters = temp.size();
char* numberStringFromTemp = new char[characters + 1];
numberStringFromTemp[characters] = (char)0x00;
size_t currentChar;
for (currentChar = 0L; currentChar < characters; currentChar++)
numberStringFromTemp[currentChar] = temp[currentChar];
//create the token
Token numberToken;
numberToken.type = TNUMBER;
numberToken.row = currentBufferRow;
numberToken.column = currentBufferColumn;
numberToken.data = numberStringFromTemp;
this->Tokens->push_back(numberToken); //save the new token
temp.clear(); //clear the temp buffer after having saved the number stored in
currentlyReading = Source; //prepare the lexer for the next character
currentBufferCharacter--; //the lexer have to re-read the current character
} else { //else save the character to the temp
temp.push_back(this->buffer[currentBufferCharacter]); //store the read character
}
break;
case String:
//if this is the end of a string
if (this->buffer[currentBufferCharacter] == '"')
{ //then save the string stored in temp
size_t characters = temp.size();
char* stringFromTemp = new char[characters + 1];
stringFromTemp[characters] = (char)0x00;
size_t currentChar;
for (currentChar = 0L; currentChar < characters; currentChar++)
stringFromTemp[currentChar] = temp[currentChar];
//create the token
Token stringToken;
stringToken.type = TSTRING;
stringToken.row = currentBufferRow;
stringToken.column = currentBufferColumn;
stringToken.data = stringFromTemp;
this->Tokens->push_back(stringToken); //save the new token
temp.clear(); //clear the temp buffer after having saved the string stored in
currentlyReading = Source; //prepare the lexer for the next character
} else if (this->buffer[currentBufferCharacter] == '\n') {
temp.push_back('\\'); //store the read character
temp.push_back('n'); //as it should be stored
currentBufferRow++; //update the row number
currentBufferColumn = 1L; //update the column number
/* FUCK YOU! STUPID USER! */
} else if (this->buffer[currentBufferCharacter] == '\t') {
temp.push_back('\\'); //store the read character
temp.push_back('t'); //as it should be stored
/* FUCK YOU! STUPID USER! */
} else if (this->buffer[currentBufferCharacter] == '\b') {
temp.push_back('\\'); //store the read character
temp.push_back('b'); //as it should be stored
/* FUCK YOU! STUPID USER! */
} else if (this->buffer[currentBufferCharacter] == '\f') {
temp.push_back('\\'); //store the read character
temp.push_back('f'); //as it should be stored
/* FUCK YOU! STUPID USER! */
} else if (this->buffer[currentBufferCharacter] == '\v') {
temp.push_back('\\'); //store the read character
temp.push_back('v'); //as it should be stored
/* FUCK YOU! STUPID USER! */
} else if (this->buffer[currentBufferCharacter] == '\r') {
temp.push_back('\\'); //store the read character
temp.push_back('r'); //as it should be stored
/* FUCK YOU! STUPID USER! */
} else if (this->buffer[currentBufferCharacter] == '\?') {
temp.push_back('\\'); //store the read character
temp.push_back('?'); //as it should be stored
/* FUCK YOU! STUPID USER! */
} else { //else save the character to the temp
temp.push_back(this->buffer[currentBufferCharacter]); //store the read character
}
break;
case Identifier:
//if this is a character that is part of an identifier
if (((this->buffer[currentBufferCharacter] >= 65) && (this->buffer[currentBufferCharacter] <= 90)) || ((this->buffer[currentBufferCharacter] >= 97) && (this->buffer[currentBufferCharacter] <= 122)) || (this->buffer[currentBufferCharacter] == '_'))
{
temp.push_back(this->buffer[currentBufferCharacter]); //store the read character
} else { //save the string stored in temp
size_t characters = temp.size();
char* identifierStringFromTemp = new char[characters + 1];
identifierStringFromTemp[characters] = (char)0x00;
size_t currentChar;
for (currentChar = 0L; currentChar < characters; currentChar++)
identifierStringFromTemp[currentChar] = temp[currentChar];
//create the token
Token identifierToken;
identifierToken.type = TIDENTIFIER;
identifierToken.row = currentBufferRow;
identifierToken.column = currentBufferColumn;
identifierToken.data = identifierStringFromTemp;
this->Tokens->push_back(identifierToken); //save the new token
temp.clear(); //clear the temp buffer after having saved the identifier stored in
currentlyReading = Source; //prepare the lexer for the next character
currentBufferCharacter--; //the lexer have to re-read the current character
}
break;
case Source:
if (this->buffer[currentBufferCharacter] == '\n')
{
currentBufferRow++; //update the row number
currentBufferColumn = 1L; //update the column number
}
else if (this->buffer[currentBufferCharacter] == '#') //single line comments aren't C-like
{
currentlyReading = SingleLineComment; //the lexer is going to read a single line comment
}
else if ((this->buffer[currentBufferCharacter] == '/') && (this->buffer[currentBufferCharacter + 1] == '*')) //multi line comments are C-like
{
currentlyReading = MultiLineComment; //the lexer is going to read a multi line comment
currentBufferCharacter++; //the lexer won't read the * simbol
}
else if (this->buffer[currentBufferCharacter] == '"') //start of a string
{
currentlyReading = String; //the lexer is going to read a string
}
else if ((this->buffer[currentBufferCharacter] >= 48) && (this->buffer[currentBufferCharacter] <= 57)) // ASCII code of 0 is 48 and of 9 is 57
{
currentlyReading = Number; //the lexer is going to read a number
currentBufferCharacter--; //a little trick: i want the lexer to read this character again (when the lexer will expect a number)
}
else if (((this->buffer[currentBufferCharacter] >= 65) && (this->buffer[currentBufferCharacter] <= 90)) || ((this->buffer[currentBufferCharacter] >= 97) && (this->buffer[currentBufferCharacter] <= 122)) || (this->buffer[currentBufferCharacter] == '_'))
{
currentlyReading = Identifier; //the lexer is going to read an identifier
currentBufferCharacter--; //the same little trick: i want the lexer to read this character again (when the lexer will expect an identifier)
}
else if (this->buffer[currentBufferCharacter] == ';')
{
Token newToken;
newToken.type = TDOTCOMMA;
newToken.row = currentBufferRow;
newToken.column = currentBufferColumn;
newToken.data = (char*)NULL;
this->Tokens->push_back(newToken);
}
else if (this->buffer[currentBufferCharacter] == '.')
{
Token newToken;
newToken.type = TDOT;
newToken.row = currentBufferRow;
newToken.column = currentBufferColumn;
newToken.data = (char*)NULL;
this->Tokens->push_back(newToken);
}
else if (this->buffer[currentBufferCharacter] == ',')
{
Token newToken;
newToken.type = TCOMMA;
newToken.row = currentBufferRow;
newToken.column = currentBufferColumn;
newToken.data = (char*)NULL;
this->Tokens->push_back(newToken);
}
else if (this->buffer[currentBufferCharacter] == '{')
{
Token newToken;
newToken.type = TLBRACE;
newToken.row = currentBufferRow;
newToken.column = currentBufferColumn;
newToken.data = (char*)NULL;
this->Tokens->push_back(newToken);
}
else if (this->buffer[currentBufferCharacter] == '}')
{
Token newToken;
newToken.type = TRBRACE;
newToken.row = currentBufferRow;
newToken.column = currentBufferColumn;
newToken.data = (char*)NULL;
this->Tokens->push_back(newToken);
}
else if (this->buffer[currentBufferCharacter] == '(')
{
Token newToken;
newToken.type = TLPAREN;
newToken.row = currentBufferRow;
newToken.column = currentBufferColumn;
newToken.data = (char*)NULL;
this->Tokens->push_back(newToken);
}
else if (this->buffer[currentBufferCharacter] == ')')
{
Token newToken;
newToken.type = TRPAREN;
newToken.row = currentBufferRow;
newToken.column = currentBufferColumn;
newToken.data = (char*)NULL;
this->Tokens->push_back(newToken);
}
else if (this->buffer[currentBufferCharacter] == '[')
{
Token newToken;
newToken.type = TLSQRPAREN;
newToken.row = currentBufferRow;
newToken.column = currentBufferColumn;
newToken.data = (char*)NULL;
this->Tokens->push_back(newToken);
}
else if (this->buffer[currentBufferCharacter] == ']')
{
Token newToken;
newToken.type = TRSQRPAREN;
newToken.row = currentBufferRow;
newToken.column = currentBufferColumn;
newToken.data = (char*)NULL;
this->Tokens->push_back(newToken);
}
break;
default:
break;
}
currentBufferColumn++; //update the number of the character in the current line
currentBufferCharacter++; //next time I'll read the next character
}
//check if the lexer reached the end unexpectly
if ((currentlyReading != Source) && (currentlyReading != SingleLineComment))
{
if (currentlyReading == Identifier)
{
size_t characters = temp.size();
char* identifierStringFromTemp = new char[characters + 1];
identifierStringFromTemp[characters] = (char)0x00;
size_t currentChar;
for (currentChar = 0L; currentChar < characters; currentChar++)
identifierStringFromTemp[currentChar] = temp[currentChar];
//create the token
Token identifierToken;
identifierToken.type = TIDENTIFIER;
identifierToken.row = currentBufferRow;
identifierToken.column = currentBufferColumn;
identifierToken.data = identifierStringFromTemp;
this->Tokens->push_back(identifierToken); //save the new token
/* FUCK YOU! STUPID USER! */
} else if (currentlyReading == Number) {
size_t characters = temp.size();
char* numberStringFromTemp = new char [characters + 1];
numberStringFromTemp[characters] = (char)0x00;
size_t currentChar;
for (currentChar = 0L; currentChar < characters; currentChar++)
numberStringFromTemp[currentChar] = temp[currentChar];
//create the token
Token numberToken;
numberToken.type = TNUMBER;
numberToken.row = currentBufferRow;
numberToken.column = currentBufferColumn;
numberToken.data = numberStringFromTemp;
this->Tokens->push_back(numberToken); //save the new token
/* FUCK YOU! STUPID USER! */
} else if (currentlyReading == String) {
//return an error
} else { //MultiLineComment
//return an error
}
}
//the lexer's job is done
return NO_ERRORS;
}