Differences From Artifact [783ee3b0fe58558e]:
- File        
polemy/lex.d
- 2010-11-08 06:19:57 - part of checkin [61998c472a] on branch trunk - Introduced unittest helpers (assert_eq, assert_throw, etc). Mmigration to it is not done yet. (user: kinaba) [annotate]
 
 
To Artifact [0972f7a454ea8e4f]:
- File        
polemy/lex.d
- 2010-11-08 08:45:51 - part of checkin [8d297342aa] on branch trunk - Replaced Token.Kind with bool quoted (user: kinaba) [annotate]
 
 
    44     44    assert( !__traits(compiles, p.column  =222) );
    45     45   }
    46     46   
    47     47   /// Represents a lexer token
    48     48   
    49     49   class Token
    50     50   {
    51         - /// currently we have three kinds of token
    52         - enum Kind {
    53         -  identifier, /// anything other than others
    54         -  stringLiteral, /// "string literal"
    55         -  number /// 42
    56         - };
    57         - immutable LexPosition pos;  /// position where the token occurred in the source
    58         - immutable string      str;  /// the token string itself
    59         - immutable Kind        kind; /// which kind of token?
           51  + immutable LexPosition pos;    /// Position where the token occurred in the source
           52  + immutable string      str;    /// The token string itself
           53  + immutable bool        quoted; /// Was it a "quoted" token or unquoted?
    60     54   
    61     55    mixin SimpleConstructor;
    62     56    mixin SimpleCompare;
    63     57   }
    64     58   
    65     59   unittest
    66     60   {
    67     61    auto p = new immutable(LexPosition)("hello.cpp", 123, 45);
    68         - auto t = new Token(p, "class", Token.Kind.identifier);
           62  + auto t = new Token(p, "class", false);
           63  + auto u = new Token(p, "class", true);
    69     64   
    70     65    assert_eq( t.pos, p );
    71     66    assert_eq( t.str, "class" );
    72         - assert_eq( t, new Token(p, "class", Token.Kind.identifier) );
    73         - assert_lt( t, new Token(p, "struct", Token.Kind.identifier) );
           67  + assert( !t.quoted );
           68  + assert_eq( t, new Token(p, "class", false) );
           69  + assert_lt( t, new Token(p, "struct", false) );
           70  + assert_ne( t, u );
           71  + assert( u.quoted );
    74     72   
    75     73    assert( !__traits(compiles, new Token) );
    76     74    assert( !__traits(compiles, t.pos=p) );
    77     75    assert( !__traits(compiles, t.str=789) );
           76  + assert( !__traits(compiles, t.quoted=true) );
    78     77   }
    79     78   
    80     79   /// Named Construtor for Lexer
    81     80   
    82     81   Lexer lexerFromFile(T...)( string filename, T rest )
    83     82   {
    84     83    return lexerFromString( std.file.readText(filename), filename, rest );
................................................................................
   113    112     std.exception.enforce(current, "Lexer has already reached the end");
   114    113     current = readNext();
   115    114    }
   116    115   
   117    116    /// Range primitive
   118    117    Lexer save() /*@property*/
   119    118    {
   120         -  return new Lexer(buffer, filename, lineno, column, current);
          119  +  return new Lexer(this.tupleof);
   121    120    }
   122    121   
   123    122   private: // implementation
   124    123   
   125    124    string buffer;
   126    125    string filename;
   127    126    int    lineno;
................................................................................
   227    226         column = 1;
   228    227        }
   229    228        else
   230    229         lit ~= c;
   231    230       }
   232    231       if( !buffer.empty )
   233    232        readChar();
   234         -    return new Token(pos, lit, Token.Kind.stringLiteral);
          233  +    return new Token(pos, lit, true);
   235    234      }
   236    235      else
   237    236      {
   238    237       // normal symbol
   239    238       auto pos = currentPosition();
   240    239       auto str = ""~readChar();
   241         -    return new Token(pos, str, Token.Kind.identifier);
          240  +    return new Token(pos, str, false);
   242    241      }
   243    242     }
   244    243     else
   245    244     {
   246    245      auto pos = currentPosition();
   247    246      int i = 0;
   248    247      while( i<buffer.length && !std.ctype.isspace(buffer[i]) && !isSymbol(buffer[i]) )
   249    248       ++i;
   250    249      auto str = buffer[0 .. i];
   251    250      buffer   = buffer[i .. $];
   252    251      column  += i;
   253         -   bool isNumber = find!(`a<'0' || '9'<a`)(str).empty;
   254         -   return new Token(pos, str, isNumber ? Token.Kind.number : Token.Kind.identifier);
          252  +   return new Token(pos, str, false);
   255    253     }
   256    254    }
   257    255   
   258    256    bool isSymbol(char c)
   259    257    {
   260    258     return (0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_');
   261    259    }
................................................................................
   269    267   unittest
   270    268   {
   271    269    assert( std.range.isForwardRange!(Lexer) );
   272    270   }
   273    271   
   274    272   unittest
   275    273   {
   276         - auto lex = lexerFromString("this is a \t\n pen :-(   ");
          274  + auto lex = lexerFromString("this is a \t\r\n pen :-(   ");
   277    275    Token[] ts = std.array.array(lex);
   278    276   
   279         - assert( ts[0].pos.lineno == 1 );
   280         - assert( ts[0].pos.column == 1 );
   281         - assert( ts[0].kind == Token.Kind.identifier );
   282         - assert( ts[0].str == "this" );
          277  + assert_eq( ts[0].pos.lineno, 1 );
          278  + assert_eq( ts[0].pos.column, 1 );
          279  + assert(   !ts[0].quoted );
          280  + assert_eq( ts[0].str, "this" );
          281  +
          282  + assert_eq( ts[1].pos.lineno, 1 );
          283  + assert_eq( ts[1].pos.column, 6 );
          284  + assert(   !ts[1].quoted );
          285  + assert_eq( ts[1].str, "is" );
   283    286   
   284         - assert( ts[1].pos.lineno == 1 );
   285         - assert( ts[1].pos.column == 6 );
   286         - assert( ts[1].kind == Token.Kind.identifier );
   287         - assert( ts[1].str == "is" );
          287  + assert_eq( ts[2].pos.lineno, 1 );
          288  + assert_eq( ts[2].pos.column, 9 );
          289  + assert(   !ts[2].quoted );
          290  + assert_eq( ts[2].str, "a" );
   288    291   
   289         - assert( ts[2].pos.lineno == 1 );
   290         - assert( ts[2].pos.column == 9 );
   291         - assert( ts[2].kind == Token.Kind.identifier );
   292         - assert( ts[2].str == "a" );
   293         -
   294         - assert( ts[3].pos.lineno == 2 );
   295         - assert( ts[3].pos.column == 2 );
   296         - assert( ts[3].kind == Token.Kind.identifier );
   297         - assert( ts[3].str == "pen" );
          292  + assert_eq( ts[3].pos.lineno, 2 );
          293  + assert_eq( ts[3].pos.column, 2 );
          294  + assert(   !ts[3].quoted );
          295  + assert_eq( ts[3].str, "pen" );
   298    296   
   299    297    // consecutive symbols are always separated
   300    298    // hence, no "++" or "<<" or ...
   301    299   
   302         - assert( ts[4].pos.lineno == 2 );
   303         - assert( ts[4].pos.column == 6 );
   304         - assert( ts[4].str == ":" );
          300  + assert_eq( ts[4].pos.lineno, 2 );
          301  + assert_eq( ts[4].pos.column, 6 );
          302  + assert_eq( ts[4].str, ":" );
          303  +
          304  + assert_eq( ts[5].pos.lineno, 2 );
          305  + assert_eq( ts[5].pos.column, 7 );
          306  + assert_eq( ts[5].str, "-" );
   305    307   
   306         - assert( ts[5].pos.lineno == 2 );
   307         - assert( ts[5].pos.column == 7 );
   308         - assert( ts[5].str == "-" );
          308  + assert_eq( ts[6].pos.lineno, 2 );
          309  + assert_eq( ts[6].pos.column, 8 );
          310  + assert_eq( ts[6].str, "(" );
   309    311   
   310         - assert( ts[6].pos.lineno == 2 );
   311         - assert( ts[6].pos.column == 8 );
   312         - assert( ts[6].str == "(" );
   313         -
   314         - assert( ts.length == 7 );
          312  + assert_eq( ts.length, 7 );
   315    313   }
   316    314   
   317    315   unittest
   318    316   {
   319    317    auto lex2 = lexerFromString(" a12\n3a 5 ");
   320         - assert( lex2.front.str == "a12" );
   321         - assert( lex2.front.kind == Token.Kind.identifier );
          318  + assert_eq( lex2.front.str, "a12" );
   322    319    lex2.popFront;
   323    320    auto lex3 = lex2.save;
   324         - assert( lex2.front.str == "3a" );
   325         - assert( lex2.front.kind == Token.Kind.identifier );
          321  + assert_eq( lex2.front.str, "3a" );
   326    322    lex2.popFront;
   327         - assert( lex3.front.str == "3a" );
   328         - assert( lex3.front.kind == Token.Kind.identifier );
   329         - assert( lex2.front.str == "5" );
   330         - assert( lex2.front.kind == Token.Kind.number );
          323  + assert_eq( lex3.front.str, "3a" );
          324  + assert_eq( lex2.front.str, "5" );
   331    325    lex2.popFront;
   332    326    lex3.popFront;
   333    327    assert( lex2.empty );
   334    328    assert( !lex3.empty );
   335         - assert( lex3.front.str == "5" );
   336         - assert( lex3.front.kind == Token.Kind.number );
          329  + assert_eq( lex3.front.str, "5" );
   337    330   }
   338    331   
   339    332   unittest
   340    333   {
   341    334   //!! be sure to run the unittest on the root of the source directory
   342    335    auto lexf = lexerFromFile("polemy/lex.d");
   343    336    lexf = find!`a.str == "module"`(lexf);
   344         - assert( lexf.front.str == "module", lexf.front.str );
   345         - assert( lexf.front.pos.filename == "polemy/lex.d" );
   346         - assert( lexf.front.pos.lineno == 7 );
   347         - assert( lexf.front.pos.column == 1 );
          337  + assert_eq( lexf.front.str, "module" );
          338  + assert_eq( lexf.front.pos.filename, "polemy/lex.d" );
          339  + assert_eq( lexf.front.pos.lineno, 7 );
          340  + assert_eq( lexf.front.pos.column, 1 );
          341  + lexf.popFront;
          342  + assert_eq( lexf.front.str, "polemy" );
          343  + assert_eq( lexf.front.pos.lineno, 7 );
          344  + assert_eq( lexf.front.pos.column, 8 );
          345  + lexf.popFront;
          346  + assert_eq( lexf.front.str, "." );
   348    347    lexf.popFront;
   349         - assert( lexf.front.str == "polemy" );
   350         - assert( lexf.front.pos.lineno == 7 );
   351         - assert( lexf.front.pos.column == 8 );
          348  + assert_eq( lexf.front.str, "lex" );
   352    349    lexf.popFront;
   353         - assert( lexf.front.str == "." );
          350  + assert_eq( lexf.front.str, ";" );
   354    351    lexf.popFront;
   355         - assert( lexf.front.str == "lex" );
   356         - lexf.popFront;
   357         - assert( lexf.front.str == ";" );
   358         - lexf.popFront;
   359         - assert( lexf.front.str == "import" );
   360         - assert( lexf.front.pos.lineno == 8 );
   361         - assert( lexf.front.pos.column == 1 );
          352  + assert_eq( lexf.front.str, "import" );
          353  + assert_eq( lexf.front.pos.lineno, 8 );
          354  + assert_eq( lexf.front.pos.column, 1 );
   362    355   }
   363    356   
   364    357   unittest
   365    358   {
   366         - auto lex = lexerFromString(`my # comment should
   367         -# hey!!
          359  + auto lex = lexerFromString(`my # comment should`~"\r\n"~`# hey!!
   368    360   be ignored.
   369    361   hahaha"hihihi""hu\\\"huhu"#123 aa
   370         -123 aa "aaa
   371         -bbb # 123
   372         -eee"
          362  +123 aa "aaa`~"\r\n"~`bbb # 123`~"\r\n"~`eee"
   373    363   zzz
   374    364   `);
   375    365    Token[] ts = std.array.array(lex);
   376         - assert( ts[0].str == "my" );
   377         - assert( ts[0].pos.lineno == 1 );
   378         - assert( ts[1].str == "be" );
   379         - assert( ts[1].pos.lineno == 3 );
   380         - assert( ts[2].str == "ignored" );
   381         - assert( ts[3].str == "." );
   382         - assert( ts[4].str == "hahaha" );
   383         - assert( ts[4].pos.lineno == 4 );
   384         - assert( ts[4].kind == Token.Kind.identifier );
   385         - assert( ts[5].str == "hihihi" );
   386         - assert( ts[5].pos.lineno == 4 );
   387         - assert( ts[5].kind == Token.Kind.stringLiteral );
   388         - assert( ts[6].str == `hu\"huhu` );
   389         - assert( ts[6].kind == Token.Kind.stringLiteral );
   390         - assert( ts[6].pos.lineno == 4 );
   391         - assert( ts[7].str == "123" );
   392         - assert( ts[7].pos.lineno == 5 );
   393         - assert( ts[7].kind == Token.Kind.number );
   394         - assert( ts[8].str == "aa" );
   395         - assert( ts[9].pos.lineno == 5 );
   396         - assert( ts[9].str == "aaa\nbbb # 123\neee" );
   397         - assert( ts[9].kind == Token.Kind.stringLiteral );
   398         - assert( ts[10].pos.lineno == 8 );
   399         - assert( ts.length == 11 );
          366  + assert_eq( ts[0].str, "my" );
          367  + assert_eq( ts[0].pos.lineno, 1 );
          368  + assert(   !ts[0].quoted );
          369  + assert_eq( ts[1].str, "be" );
          370  + assert_eq( ts[1].pos.lineno, 3 );
          371  + assert(   !ts[1].quoted );
          372  + assert_eq( ts[2].str, "ignored" );
          373  + assert(   !ts[2].quoted );
          374  + assert_eq( ts[3].str, "." );
          375  + assert(   !ts[3].quoted );
          376  + assert_eq( ts[4].str, "hahaha" );
          377  + assert_eq( ts[4].pos.lineno, 4 );
          378  + assert(   !ts[4].quoted );
          379  + assert_eq( ts[5].str, "hihihi" );
          380  + assert_eq( ts[5].pos.lineno, 4 );
          381  + assert(    ts[5].quoted );
          382  + assert_eq( ts[6].str, `hu\"huhu` );
          383  + assert_eq( ts[6].pos.lineno, 4 );
          384  + assert(    ts[6].quoted );
          385  + assert_eq( ts[7].str, "123" );
          386  + assert_eq( ts[7].pos.lineno, 5 );
          387  + assert_eq( ts[8].str, "aa" );
          388  + assert_eq( ts[9].pos.lineno, 5 );
          389  + assert_eq( ts[9].str, "aaa\nbbb # 123\neee" );
          390  + assert(    ts[9].quoted );
          391  + assert_eq( ts[10].pos.lineno, 8 );
          392  + assert(   !ts[10].quoted );
          393  + assert_eq( ts.length, 11 );
   400    394   }