#include "Lexer.h"
Lexer::Lexer(char* buffer)
{
this->buffer = buffer;
this->bufferLength = strlen(buffer);
this->MultiLineFoundComments = 0L;
this->SingleLineFoundComments = 0L;
}
short Lexer::Analyze(void)
{
if (this->buffer == (char*)NULL)
return NULL_BUFFER;
else if (this->bufferLength == 0)
return EMPTY_BUFFER;
//what is the lexer reading?
LexReading currentlyReading;
unsigned long currentBufferCharacter = 0L; //the character that the leer will read
vector<char> temp; //a temporary vector to store the characters of identifiers, numbers, strings and chars
while (currentBufferCharacter < (this->bufferLength - 1))
{
switch (currentlyReading)
{
case MultiLineComment:
//if this is the end of the comment
if ((this->buffer[currentBufferCharacter] == '*') && (this->buffer[currentBufferCharacter + 1] == '/'))
{
currentlyReading = Source; //the lexer is going to read source code again
this->MultiLineFoundComments++; //update the number of multi line comments lexed
currentBufferCharacter++; //the lexer won't read the / character the next step
}
//else do nothing, I don't care about comments
break;
case SingleLineComment:
//if this is the end of the line
if (this->buffer[currentBufferCharacter] == '\n')
{
currentlyReading = Source; //the lexer is going to read source code again
this->SingleLineFoundComments++; //update the number of single line comments lexed
} //else do nothing, I don't care about comments
break;
case Number:
if (((this->buffer[currentBufferCharacter] < 48) || (this->buffer[currentBufferCharacter] > 57)) && ((this->buffer[currentBufferCharacter] != '.') && (this->buffer[currentBufferCharacter] != 'D') && (this->buffer[currentBufferCharacter] != 'X') && (this->buffer[currentBufferCharacter] != 'B') && (this->buffer[currentBufferCharacter] != 'd') && (this->buffer[currentBufferCharacter] != 'x') && (this->buffer[currentBufferCharacter] != 'b') && (this->buffer[currentBufferCharacter] != 'a')&& (this->buffer[currentBufferCharacter] != 'A') && (this->buffer[currentBufferCharacter] != 'b') && (this->buffer[currentBufferCharacter] != 'C') && (this->buffer[currentBufferCharacter] != 'c') && (this->buffer[currentBufferCharacter] != 'd') && (this->buffer[currentBufferCharacter] != 'D') && (this->buffer[currentBufferCharacter] != 'e') && (this->buffer[currentBufferCharacter] != 'E') && (this->buffer[currentBufferCharacter] != 'f') && (this->buffer[currentBufferCharacter] != 'F')))
{
size_t characters = temp.size();
char* numberStringFromTemp = (char*)malloc((unsigned long long int)(sizeof(char) * characters + 1));
numberStringFromTemp[characters - 1] = (char)0x00;
numberStringFromTemp[characters] = (char)0x00;
size_t currentChar;
for (currentChar = 0L; currentChar < characters; currentChar++)
numberStringFromTemp[currentChar] = temp[currentChar];
this->Numbers.push_back(numberStringFromTemp);
this->Tokens.push_back(TNUMBER);
temp.clear();
currentlyReading = Source;
} else {
temp.push_back(this->buffer[currentBufferCharacter]); //store the read character
}
break;
case Source:
//if this is the beginning of a single line comment
if (this->buffer[currentBufferCharacter] == '#') //single line comments aren't C-like
{
currentlyReading = SingleLineComment; //the lexer is going to read a single line comment
}
else if ((this->buffer[currentBufferCharacter] == '/') && (this->buffer[currentBufferCharacter + 1] == '*')) //multi line comments are C-like
{
currentlyReading = MultiLineComment; //the lexer is going to read a multi line comment
currentBufferCharacter++; //the lexer won't read the * simbol
}
else if ((this->buffer[currentBufferCharacter] >= 48) && (this->buffer[currentBufferCharacter] <= 57)) // ASCII code of 0 is 48 and of 9 is 57
{
currentlyReading = Number; //the lexer is going to read a number
currentBufferCharacter--; //a little trick: i want the lexer to read this character again (when the lexer will expect a number)
}
break;
default:
break;
}
currentBufferCharacter++; //next time I'll read the next character
}
//the lexer's job is done
return NO_ERRORS;
}