sqlexer.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. /*
  2. see copyright notice in squirrel.h
  3. */
  4. #include "sqpcheader.h"
  5. #include <ctype.h>
  6. #include <stdlib.h>
  7. #include "sqtable.h"
  8. #include "sqstring.h"
  9. #include "sqcompiler.h"
  10. #include "sqlexer.h"
  11. #define CUR_CHAR (_currdata)
  12. #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
  13. #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
  14. #define NEXT() {Next();_currentcolumn++;}
  15. #define INIT_TEMP_STRING() { _longstr.resize(0);}
  16. #define APPEND_CHAR(c) { _longstr.push_back(c);}
  17. #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
  18. #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
  19. SQLexer::SQLexer(){}
  20. SQLexer::~SQLexer()
  21. {
  22. _keywords->Release();
  23. }
  24. void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
  25. {
  26. _errfunc = efunc;
  27. _errtarget = ed;
  28. _sharedstate = ss;
  29. _keywords = SQTable::Create(ss, 37);
  30. ADD_KEYWORD(while, TK_WHILE);
  31. ADD_KEYWORD(do, TK_DO);
  32. ADD_KEYWORD(if, TK_IF);
  33. ADD_KEYWORD(else, TK_ELSE);
  34. ADD_KEYWORD(break, TK_BREAK);
  35. ADD_KEYWORD(continue, TK_CONTINUE);
  36. ADD_KEYWORD(return, TK_RETURN);
  37. ADD_KEYWORD(null, TK_NULL);
  38. ADD_KEYWORD(function, TK_FUNCTION);
  39. ADD_KEYWORD(local, TK_LOCAL);
  40. ADD_KEYWORD(for, TK_FOR);
  41. ADD_KEYWORD(foreach, TK_FOREACH);
  42. ADD_KEYWORD(in, TK_IN);
  43. ADD_KEYWORD(typeof, TK_TYPEOF);
  44. ADD_KEYWORD(base, TK_BASE);
  45. ADD_KEYWORD(delete, TK_DELETE);
  46. ADD_KEYWORD(try, TK_TRY);
  47. ADD_KEYWORD(catch, TK_CATCH);
  48. ADD_KEYWORD(throw, TK_THROW);
  49. ADD_KEYWORD(clone, TK_CLONE);
  50. ADD_KEYWORD(yield, TK_YIELD);
  51. ADD_KEYWORD(resume, TK_RESUME);
  52. ADD_KEYWORD(switch, TK_SWITCH);
  53. ADD_KEYWORD(case, TK_CASE);
  54. ADD_KEYWORD(default, TK_DEFAULT);
  55. ADD_KEYWORD(this, TK_THIS);
  56. ADD_KEYWORD(class,TK_CLASS);
  57. ADD_KEYWORD(extends,TK_EXTENDS);
  58. ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
  59. ADD_KEYWORD(instanceof,TK_INSTANCEOF);
  60. ADD_KEYWORD(true,TK_TRUE);
  61. ADD_KEYWORD(false,TK_FALSE);
  62. ADD_KEYWORD(static,TK_STATIC);
  63. ADD_KEYWORD(enum,TK_ENUM);
  64. ADD_KEYWORD(const,TK_CONST);
  65. ADD_KEYWORD(__LINE__,TK___LINE__);
  66. ADD_KEYWORD(__FILE__,TK___FILE__);
  67. _readf = rg;
  68. _up = up;
  69. _lasttokenline = _currentline = 1;
  70. _currentcolumn = 0;
  71. _prevtoken = -1;
  72. _reached_eof = SQFalse;
  73. Next();
  74. }
  75. void SQLexer::Error(const SQChar *err)
  76. {
  77. _errfunc(_errtarget,err);
  78. }
  79. void SQLexer::Next()
  80. {
  81. SQInteger t = _readf(_up);
  82. if(t > MAX_CHAR) Error(_SC("Invalid character"));
  83. if(t != 0) {
  84. _currdata = (LexChar)t;
  85. return;
  86. }
  87. _currdata = SQUIRREL_EOB;
  88. _reached_eof = SQTrue;
  89. }
  90. const SQChar *SQLexer::Tok2Str(SQInteger tok)
  91. {
  92. SQObjectPtr itr, key, val;
  93. SQInteger nitr;
  94. while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
  95. itr = (SQInteger)nitr;
  96. if(((SQInteger)_integer(val)) == tok)
  97. return _stringval(key);
  98. }
  99. return NULL;
  100. }
  101. void SQLexer::LexBlockComment()
  102. {
  103. bool done = false;
  104. while(!done) {
  105. switch(CUR_CHAR) {
  106. case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
  107. case _SC('\n'): _currentline++; NEXT(); continue;
  108. case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
  109. default: NEXT();
  110. }
  111. }
  112. }
  113. void SQLexer::LexLineComment()
  114. {
  115. do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
  116. }
  117. SQInteger SQLexer::Lex()
  118. {
  119. _lasttokenline = _currentline;
  120. while(CUR_CHAR != SQUIRREL_EOB) {
  121. switch(CUR_CHAR){
  122. case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
  123. case _SC('\n'):
  124. _currentline++;
  125. _prevtoken=_curtoken;
  126. _curtoken=_SC('\n');
  127. NEXT();
  128. _currentcolumn=1;
  129. continue;
  130. case _SC('#'): LexLineComment(); continue;
  131. case _SC('/'):
  132. NEXT();
  133. switch(CUR_CHAR){
  134. case _SC('*'):
  135. NEXT();
  136. LexBlockComment();
  137. continue;
  138. case _SC('/'):
  139. LexLineComment();
  140. continue;
  141. case _SC('='):
  142. NEXT();
  143. RETURN_TOKEN(TK_DIVEQ);
  144. continue;
  145. case _SC('>'):
  146. NEXT();
  147. RETURN_TOKEN(TK_ATTR_CLOSE);
  148. continue;
  149. default:
  150. RETURN_TOKEN('/');
  151. }
  152. case _SC('='):
  153. NEXT();
  154. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
  155. else { NEXT(); RETURN_TOKEN(TK_EQ); }
  156. case _SC('<'):
  157. NEXT();
  158. switch(CUR_CHAR) {
  159. case _SC('='):
  160. NEXT();
  161. if(CUR_CHAR == _SC('>')) {
  162. NEXT();
  163. RETURN_TOKEN(TK_3WAYSCMP);
  164. }
  165. RETURN_TOKEN(TK_LE)
  166. break;
  167. case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
  168. case _SC('<'): NEXT(); RETURN_TOKEN(TK_SHIFTL); break;
  169. case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
  170. }
  171. RETURN_TOKEN('<');
  172. case _SC('>'):
  173. NEXT();
  174. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
  175. else if(CUR_CHAR == _SC('>')){
  176. NEXT();
  177. if(CUR_CHAR == _SC('>')){
  178. NEXT();
  179. RETURN_TOKEN(TK_USHIFTR);
  180. }
  181. RETURN_TOKEN(TK_SHIFTR);
  182. }
  183. else { RETURN_TOKEN('>') }
  184. case _SC('!'):
  185. NEXT();
  186. if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
  187. else { NEXT(); RETURN_TOKEN(TK_NE); }
  188. case _SC('@'): {
  189. SQInteger stype;
  190. NEXT();
  191. if(CUR_CHAR != _SC('"')) {
  192. RETURN_TOKEN('@');
  193. }
  194. if((stype=ReadString('"',true))!=-1) {
  195. RETURN_TOKEN(stype);
  196. }
  197. Error(_SC("error parsing the string"));
  198. }
  199. case _SC('"'):
  200. case _SC('\''): {
  201. SQInteger stype;
  202. if((stype=ReadString(CUR_CHAR,false))!=-1){
  203. RETURN_TOKEN(stype);
  204. }
  205. Error(_SC("error parsing the string"));
  206. }
  207. case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
  208. case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
  209. {SQInteger ret = CUR_CHAR;
  210. NEXT(); RETURN_TOKEN(ret); }
  211. case _SC('.'):
  212. NEXT();
  213. if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
  214. NEXT();
  215. if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
  216. NEXT();
  217. RETURN_TOKEN(TK_VARPARAMS);
  218. case _SC('&'):
  219. NEXT();
  220. if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
  221. else { NEXT(); RETURN_TOKEN(TK_AND); }
  222. case _SC('|'):
  223. NEXT();
  224. if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
  225. else { NEXT(); RETURN_TOKEN(TK_OR); }
  226. case _SC(':'):
  227. NEXT();
  228. if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
  229. else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
  230. case _SC('*'):
  231. NEXT();
  232. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
  233. else RETURN_TOKEN('*');
  234. case _SC('%'):
  235. NEXT();
  236. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
  237. else RETURN_TOKEN('%');
  238. case _SC('-'):
  239. NEXT();
  240. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
  241. else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
  242. else RETURN_TOKEN('-');
  243. case _SC('+'):
  244. NEXT();
  245. if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
  246. else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
  247. else RETURN_TOKEN('+');
  248. case SQUIRREL_EOB:
  249. return 0;
  250. default:{
  251. if (scisdigit(CUR_CHAR)) {
  252. SQInteger ret = ReadNumber();
  253. RETURN_TOKEN(ret);
  254. }
  255. else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
  256. SQInteger t = ReadID();
  257. RETURN_TOKEN(t);
  258. }
  259. else {
  260. SQInteger c = CUR_CHAR;
  261. if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
  262. NEXT();
  263. RETURN_TOKEN(c);
  264. }
  265. RETURN_TOKEN(0);
  266. }
  267. }
  268. }
  269. return 0;
  270. }
  271. SQInteger SQLexer::GetIDType(const SQChar *s,SQInteger len)
  272. {
  273. SQObjectPtr t;
  274. if(_keywords->GetStr(s,len, t)) {
  275. return SQInteger(_integer(t));
  276. }
  277. return TK_IDENTIFIER;
  278. }
  279. SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
  280. {
  281. INIT_TEMP_STRING();
  282. NEXT();
  283. if(IS_EOB()) return -1;
  284. for(;;) {
  285. while(CUR_CHAR != ndelim) {
  286. switch(CUR_CHAR) {
  287. case SQUIRREL_EOB:
  288. Error(_SC("unfinished string"));
  289. return -1;
  290. case _SC('\n'):
  291. if(!verbatim) Error(_SC("newline in a constant"));
  292. APPEND_CHAR(CUR_CHAR); NEXT();
  293. _currentline++;
  294. break;
  295. case _SC('\\'):
  296. if(verbatim) {
  297. APPEND_CHAR('\\'); NEXT();
  298. }
  299. else {
  300. NEXT();
  301. switch(CUR_CHAR) {
  302. case _SC('x'): NEXT(); {
  303. if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
  304. const SQInteger maxdigits = 4;
  305. SQChar temp[maxdigits+1];
  306. SQInteger n = 0;
  307. while(isxdigit(CUR_CHAR) && n < maxdigits) {
  308. temp[n] = CUR_CHAR;
  309. n++;
  310. NEXT();
  311. }
  312. temp[n] = 0;
  313. SQChar *sTemp;
  314. APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
  315. }
  316. break;
  317. case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
  318. case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
  319. case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
  320. case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
  321. case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
  322. case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
  323. case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
  324. case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
  325. case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
  326. case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
  327. case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
  328. default:
  329. Error(_SC("unrecognised escaper char"));
  330. break;
  331. }
  332. }
  333. break;
  334. default:
  335. APPEND_CHAR(CUR_CHAR);
  336. NEXT();
  337. }
  338. }
  339. NEXT();
  340. if(verbatim && CUR_CHAR == '"') { //double quotation
  341. APPEND_CHAR(CUR_CHAR);
  342. NEXT();
  343. }
  344. else {
  345. break;
  346. }
  347. }
  348. TERMINATE_BUFFER();
  349. SQInteger len = _longstr.size()-1;
  350. if(ndelim == _SC('\'')) {
  351. if(len == 0) Error(_SC("empty constant"));
  352. if(len > 1) Error(_SC("constant too long"));
  353. _nvalue = _longstr[0];
  354. return TK_INTEGER;
  355. }
  356. _svalue = &_longstr[0];
  357. return TK_STRING_LITERAL;
  358. }
  359. void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
  360. {
  361. *res = 0;
  362. while(*s != 0)
  363. {
  364. if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
  365. else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
  366. else { assert(0); }
  367. }
  368. }
  369. void LexInteger(const SQChar *s,SQUnsignedInteger *res)
  370. {
  371. *res = 0;
  372. while(*s != 0)
  373. {
  374. *res = (*res)*10+((*s++)-'0');
  375. }
  376. }
  377. SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
  378. void LexOctal(const SQChar *s,SQUnsignedInteger *res)
  379. {
  380. *res = 0;
  381. while(*s != 0)
  382. {
  383. if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
  384. else { assert(0); }
  385. }
  386. }
  387. SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
  388. #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
  389. SQInteger SQLexer::ReadNumber()
  390. {
  391. #define TINT 1
  392. #define TFLOAT 2
  393. #define THEX 3
  394. #define TSCIENTIFIC 4
  395. #define TOCTAL 5
  396. SQInteger type = TINT, firstchar = CUR_CHAR;
  397. SQChar *sTemp;
  398. INIT_TEMP_STRING();
  399. NEXT();
  400. if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
  401. if(scisodigit(CUR_CHAR)) {
  402. type = TOCTAL;
  403. while(scisodigit(CUR_CHAR)) {
  404. APPEND_CHAR(CUR_CHAR);
  405. NEXT();
  406. }
  407. if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
  408. }
  409. else {
  410. NEXT();
  411. type = THEX;
  412. while(isxdigit(CUR_CHAR)) {
  413. APPEND_CHAR(CUR_CHAR);
  414. NEXT();
  415. }
  416. if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
  417. }
  418. }
  419. else {
  420. APPEND_CHAR((int)firstchar);
  421. while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
  422. if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
  423. if(isexponent(CUR_CHAR)) {
  424. if(type != TFLOAT) Error(_SC("invalid numeric format"));
  425. type = TSCIENTIFIC;
  426. APPEND_CHAR(CUR_CHAR);
  427. NEXT();
  428. if(CUR_CHAR == '+' || CUR_CHAR == '-'){
  429. APPEND_CHAR(CUR_CHAR);
  430. NEXT();
  431. }
  432. if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
  433. }
  434. APPEND_CHAR(CUR_CHAR);
  435. NEXT();
  436. }
  437. }
  438. TERMINATE_BUFFER();
  439. switch(type) {
  440. case TSCIENTIFIC:
  441. case TFLOAT:
  442. _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
  443. return TK_FLOAT;
  444. case TINT:
  445. LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  446. return TK_INTEGER;
  447. case THEX:
  448. LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  449. return TK_INTEGER;
  450. case TOCTAL:
  451. LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
  452. return TK_INTEGER;
  453. }
  454. return 0;
  455. }
  456. SQInteger SQLexer::ReadID()
  457. {
  458. SQInteger res;
  459. INIT_TEMP_STRING();
  460. do {
  461. APPEND_CHAR(CUR_CHAR);
  462. NEXT();
  463. } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
  464. TERMINATE_BUFFER();
  465. res = GetIDType(&_longstr[0],_longstr.size() - 1);
  466. if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
  467. _svalue = &_longstr[0];
  468. }
  469. return res;
  470. }