Преглед на файлове

added \u and \U escape sequence for UTF8,UTF16 or UCS4 characters
the escape character \x is based on sizeof(SQChar)

albertodemichelis преди 10 години
родител
ревизия
737ea798a8
променени са 4 файла, в които са добавени 97 реда и са изтрити 17 реда
  1. 2 0
      HISTORY
  2. 3 3
      sqstdlib/sqstdrex.cpp
  3. 84 14
      squirrel/sqlexer.cpp
  4. 8 0
      squirrel/sqlexer.h

+ 2 - 0
HISTORY

@@ -5,6 +5,8 @@
 -added sq_getreleasehook
 -added thread.wakeupthrow()
 -added sq_pushthread
+-added \u and \U escape sequence for UTF8,UTF16 or UCS4 characters
+-the escape character \x is based on sizeof(SQChar)
 -fixed optimizer bug in compound arith oprators(+=,-= etc...)
 -fixed sq_getrefvmcount() (thx Gerrit)
 -fixed sq_getrefcount() when no references were added with sq_addref() (thx Gerrit)

+ 3 - 3
sqstdlib/sqstdrex.cpp

@@ -523,10 +523,10 @@ static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar
         return NULL;
     case OP_MB:
         {
-            int cb = node->left; //char that opens a balanced expression
+            SQInteger cb = node->left; //char that opens a balanced expression
             if(*str != cb) return NULL; // string doesnt start with open char
-            int ce = node->right; //char that closes a balanced expression
-            int cont = 1;
+            SQInteger ce = node->right; //char that closes a balanced expression
+            SQInteger cont = 1;
             const SQChar *streol = exp->_eol;
             while (++str < streol) {
               if (*str == ce) {

+ 84 - 14
squirrel/sqlexer.cpp

@@ -287,6 +287,65 @@ SQInteger SQLexer::GetIDType(const SQChar *s,SQInteger len)
     return TK_IDENTIFIER;
 }
 
+#ifdef SQUNICODE
+#if WCHAR_SIZE == 2
+SQInteger SQLexer::AddUTF16(SQUnsignedInteger ch)
+{
+    if (ch >= 0x10000)
+    {
+        SQUnsignedInteger code = (ch - 0x10000);
+        APPEND_CHAR((SQChar)(0xD800 | (code >> 10)));
+        APPEND_CHAR((SQChar)(0xDC00 | (code & 0x3FF)));
+        return 2;
+    }
+    else {
+        APPEND_CHAR((SQChar)ch);
+        return 1;
+    }
+}
+#endif
+#else
+SQInteger SQLexer::AddUTF8(SQUnsignedInteger ch)
+{
+    if (ch < 0x80) {
+        APPEND_CHAR((char)ch);
+        return 1;
+    }
+    if (ch < 0x800) {
+        APPEND_CHAR((SQChar)((ch >> 6) | 0xC0));
+        APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
+        return 2;
+    }
+    if (ch < 0x10000) {
+        APPEND_CHAR((SQChar)((ch >> 12) | 0xE0));
+        APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
+        APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
+        return 3;
+    }
+    if (ch < 0x110000) {
+        APPEND_CHAR((SQChar)((ch >> 18) | 0xF0));
+        APPEND_CHAR((SQChar)(((ch >> 12) & 0x3F) | 0x80));
+        APPEND_CHAR((SQChar)(((ch >> 6) & 0x3F) | 0x80));
+        APPEND_CHAR((SQChar)((ch & 0x3F) | 0x80));
+        return 4;
+    }
+    return 0;
+}
+#endif
+
+SQInteger SQLexer::ProcessStringHexEscape(SQChar *dest, SQInteger maxdigits)
+{
+    NEXT();
+    if (!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
+    SQInteger n = 0;
+    while (isxdigit(CUR_CHAR) && n < maxdigits) {
+        dest[n] = CUR_CHAR;
+        n++;
+        NEXT();
+    }
+    dest[n] = 0;
+    return n;
+}
 
 SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
 {
@@ -295,7 +354,8 @@ SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
     if(IS_EOB()) return -1;
     for(;;) {
         while(CUR_CHAR != ndelim) {
-            switch(CUR_CHAR) {
+            SQInteger x = CUR_CHAR;
+            switch (x) {
             case SQUIRREL_EOB:
                 Error(_SC("unfinished string"));
                 return -1;
@@ -311,19 +371,29 @@ SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
                 else {
                     NEXT();
                     switch(CUR_CHAR) {
-                    case _SC('x'): NEXT(); {
-                        if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
-                        const SQInteger maxdigits = 4;
-                        SQChar temp[maxdigits+1];
-                        SQInteger n = 0;
-                        while(isxdigit(CUR_CHAR) && n < maxdigits) {
-                            temp[n] = CUR_CHAR;
-                            n++;
-                            NEXT();
-                        }
-                        temp[n] = 0;
-                        SQChar *sTemp;
-                        APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
+                    case _SC('x'):  {
+                        const SQInteger maxdigits = sizeof(SQChar) * 2;
+                        SQChar temp[maxdigits + 1];
+                        ProcessStringHexEscape(temp, maxdigits);
+                        SQChar *stemp;
+                        APPEND_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
+                    }
+                    break;
+                    case _SC('U'):  
+                    case _SC('u'):  {
+                        const SQInteger maxdigits = x == 'u' ? 4 : 8;
+                        SQChar temp[8 + 1];
+                        ProcessStringHexEscape(temp, maxdigits);
+                        SQChar *stemp;
+#ifdef SQUNICODE
+#if WCHAR_SIZE == 2
+                        AddUTF16(scstrtoul(temp, &stemp, 16));
+#else
+                        ADD_CHAR((SQChar)scstrtoul(temp, &stemp, 16));
+#endif
+#else
+                        AddUTF8(scstrtoul(temp, &stemp, 16));
+#endif
                     }
                     break;
                     case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;

+ 8 - 0
squirrel/sqlexer.h

@@ -24,6 +24,14 @@ private:
     void LexLineComment();
     SQInteger ReadID();
     void Next();
+#ifdef SQUNICODE
+#if WCHAR_SIZE == 2
+    SQInteger AddUTF16(SQUnsignedInteger ch);
+#endif
+#else
+    SQInteger AddUTF8(SQUnsignedInteger ch);
+#endif    
+    SQInteger ProcessStringHexEscape(SQChar *dest, SQInteger maxdigits);
     SQInteger _curtoken;
     SQTable *_keywords;
     SQBool _reached_eof;