Differences From Artifact [783ee3b0fe58558e]:
- File        
polemy/lex.d
- 2010-11-08 06:19:57 - part of checkin [61998c472a] on branch trunk - Introduced unittest helpers (assert_eq, assert_throw, etc). Mmigration to it is not done yet. (user: kinaba) [annotate]
 
 
To Artifact [0972f7a454ea8e4f]:
- File        
polemy/lex.d
- 2010-11-08 08:45:51 - part of checkin [8d297342aa] on branch trunk - Replaced Token.Kind with bool quoted (user: kinaba) [annotate]
 
 
   44          assert( !__traits(compiles, p.column  =222) );                                44          assert( !__traits(compiles, p.column  =222) );
   45  }                                                                                     45  }
   46                                                                                        46  
   47  /// Represents a lexer token                                                          47  /// Represents a lexer token
   48                                                                                        48  
   49  class Token                                                                           49  class Token
   50  {                                                                                     50  {
   51          /// currently we have three kinds of token                               <
   52          enum Kind {                                                              <
   53                  identifier, /// anything other than others                       <
   54                  stringLiteral, /// "string literal"                              <
   55                  number /// 42                                                    <
   56          };                                                                       <
   57          immutable LexPosition pos;  /// position where the token occurred in the |    51          immutable LexPosition pos;    /// Position where the token occurred in t
   58          immutable string      str;  /// the token string itself                  |    52          immutable string      str;    /// The token string itself
   59          immutable Kind        kind; /// which kind of token?                     <
                                                                                        >    53          immutable bool        quoted; /// Was it a "quoted" token or unquoted?
   60                                                                                        54  
   61          mixin SimpleConstructor;                                                      55          mixin SimpleConstructor;
   62          mixin SimpleCompare;                                                          56          mixin SimpleCompare;
   63  }                                                                                     57  }
   64                                                                                        58  
   65  unittest                                                                              59  unittest
   66  {                                                                                     60  {
   67          auto p = new immutable(LexPosition)("hello.cpp", 123, 45);                    61          auto p = new immutable(LexPosition)("hello.cpp", 123, 45);
   68          auto t = new Token(p, "class", Token.Kind.identifier);                   |    62          auto t = new Token(p, "class", false);
                                                                                        >    63          auto u = new Token(p, "class", true);
   69                                                                                        64  
   70          assert_eq( t.pos, p );                                                        65          assert_eq( t.pos, p );
   71          assert_eq( t.str, "class" );                                                  66          assert_eq( t.str, "class" );
                                                                                        >    67          assert( !t.quoted );
   72          assert_eq( t, new Token(p, "class", Token.Kind.identifier) );            |    68          assert_eq( t, new Token(p, "class", false) );
   73          assert_lt( t, new Token(p, "struct", Token.Kind.identifier) );           |    69          assert_lt( t, new Token(p, "struct", false) );
                                                                                        >    70          assert_ne( t, u );
                                                                                        >    71          assert( u.quoted );
   74                                                                                        72  
   75          assert( !__traits(compiles, new Token) );                                     73          assert( !__traits(compiles, new Token) );
   76          assert( !__traits(compiles, t.pos=p) );                                       74          assert( !__traits(compiles, t.pos=p) );
   77          assert( !__traits(compiles, t.str=789) );                                     75          assert( !__traits(compiles, t.str=789) );
                                                                                        >    76          assert( !__traits(compiles, t.quoted=true) );
   78  }                                                                                     77  }
   79                                                                                        78  
   80  /// Named Construtor for Lexer                                                        79  /// Named Construtor for Lexer
   81                                                                                        80  
   82  Lexer lexerFromFile(T...)( string filename, T rest )                                  81  Lexer lexerFromFile(T...)( string filename, T rest )
   83  {                                                                                     82  {
   84          return lexerFromString( std.file.readText(filename), filename, rest );        83          return lexerFromString( std.file.readText(filename), filename, rest );
................................................................................................................................................................................
  113                  std.exception.enforce(current, "Lexer has already reached the en     112                  std.exception.enforce(current, "Lexer has already reached the en
  114                  current = readNext();                                                113                  current = readNext();
  115          }                                                                            114          }
  116                                                                                       115  
  117          /// Range primitive                                                          116          /// Range primitive
  118          Lexer save() /*@property*/                                                   117          Lexer save() /*@property*/
  119          {                                                                            118          {
  120                  return new Lexer(buffer, filename, lineno, column, current);     |   119                  return new Lexer(this.tupleof);
  121          }                                                                            120          }
  122                                                                                       121  
  123  private: // implementation                                                           122  private: // implementation
  124                                                                                       123  
  125          string buffer;                                                               124          string buffer;
  126          string filename;                                                             125          string filename;
  127          int    lineno;                                                               126          int    lineno;
................................................................................................................................................................................
  227                                                  column = 1;                          226                                                  column = 1;
  228                                          }                                            227                                          }
  229                                          else                                         228                                          else
  230                                                  lit ~= c;                            229                                                  lit ~= c;
  231                                  }                                                    230                                  }
  232                                  if( !buffer.empty )                                  231                                  if( !buffer.empty )
  233                                          readChar();                                  232                                          readChar();
  234                                  return new Token(pos, lit, Token.Kind.stringLite |   233                                  return new Token(pos, lit, true);
  235                          }                                                            234                          }
  236                          else                                                         235                          else
  237                          {                                                            236                          {
  238                                  // normal symbol                                     237                                  // normal symbol
  239                                  auto pos = currentPosition();                        238                                  auto pos = currentPosition();
  240                                  auto str = ""~readChar();                            239                                  auto str = ""~readChar();
  241                                  return new Token(pos, str, Token.Kind.identifier |   240                                  return new Token(pos, str, false);
  242                          }                                                            241                          }
  243                  }                                                                    242                  }
  244                  else                                                                 243                  else
  245                  {                                                                    244                  {
  246                          auto pos = currentPosition();                                245                          auto pos = currentPosition();
  247                          int i = 0;                                                   246                          int i = 0;
  248                          while( i<buffer.length && !std.ctype.isspace(buffer[i])      247                          while( i<buffer.length && !std.ctype.isspace(buffer[i]) 
  249                                  ++i;                                                 248                                  ++i;
  250                          auto str = buffer[0 .. i];                                   249                          auto str = buffer[0 .. i];
  251                          buffer   = buffer[i .. $];                                   250                          buffer   = buffer[i .. $];
  252                          column  += i;                                                251                          column  += i;
  253                          bool isNumber = find!(`a<'0' || '9'<a`)(str).empty;      <
  254                          return new Token(pos, str, isNumber ? Token.Kind.number  |   252                          return new Token(pos, str, false);
  255                  }                                                                    253                  }
  256          }                                                                            254          }
  257                                                                                       255  
  258          bool isSymbol(char c)                                                        256          bool isSymbol(char c)
  259          {                                                                            257          {
  260                  return (0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_');      258                  return (0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_');
  261          }                                                                            259          }
................................................................................................................................................................................
  269  unittest                                                                             267  unittest
  270  {                                                                                    268  {
  271          assert( std.range.isForwardRange!(Lexer) );                                  269          assert( std.range.isForwardRange!(Lexer) );
  272  }                                                                                    270  }
  273                                                                                       271  
  274  unittest                                                                             272  unittest
  275  {                                                                                    273  {
  276          auto lex = lexerFromString("this        is a \t\n pen :-(   ");          |   274          auto lex = lexerFromString("this        is a \t\r\n pen :-(   ");
  277          Token[] ts = std.array.array(lex);                                           275          Token[] ts = std.array.array(lex);
  278                                                                                       276  
  279          assert( ts[0].pos.lineno == 1 );                                         |   277          assert_eq( ts[0].pos.lineno, 1 );
  280          assert( ts[0].pos.column == 1 );                                         |   278          assert_eq( ts[0].pos.column, 1 );
  281          assert( ts[0].kind == Token.Kind.identifier );                           |   279          assert(   !ts[0].quoted );
  282          assert( ts[0].str == "this" );                                           |   280          assert_eq( ts[0].str, "this" );
                                                                                        >   281  
                                                                                        >   282          assert_eq( ts[1].pos.lineno, 1 );
                                                                                        >   283          assert_eq( ts[1].pos.column, 6 );
                                                                                        >   284          assert(   !ts[1].quoted );
                                                                                        >   285          assert_eq( ts[1].str, "is" );
  283                                                                                       286  
  284          assert( ts[1].pos.lineno == 1 );                                         |   287          assert_eq( ts[2].pos.lineno, 1 );
  285          assert( ts[1].pos.column == 6 );                                         |   288          assert_eq( ts[2].pos.column, 9 );
  286          assert( ts[1].kind == Token.Kind.identifier );                           |   289          assert(   !ts[2].quoted );
  287          assert( ts[1].str == "is" );                                             |   290          assert_eq( ts[2].str, "a" );
  288                                                                                       291  
  289          assert( ts[2].pos.lineno == 1 );                                         <
  290          assert( ts[2].pos.column == 9 );                                         <
  291          assert( ts[2].kind == Token.Kind.identifier );                           <
  292          assert( ts[2].str == "a" );                                              <
  293                                                                                   <
  294          assert( ts[3].pos.lineno == 2 );                                         |   292          assert_eq( ts[3].pos.lineno, 2 );
  295          assert( ts[3].pos.column == 2 );                                         |   293          assert_eq( ts[3].pos.column, 2 );
  296          assert( ts[3].kind == Token.Kind.identifier );                           <
                                                                                        >   294          assert(   !ts[3].quoted );
  297          assert( ts[3].str == "pen" );                                            |   295          assert_eq( ts[3].str, "pen" );
  298                                                                                       296  
  299          // consecutive symbols are always separated                                  297          // consecutive symbols are always separated
  300          // hence, no "++" or "<<" or ...                                             298          // hence, no "++" or "<<" or ...
  301                                                                                       299  
  302          assert( ts[4].pos.lineno == 2 );                                         |   300          assert_eq( ts[4].pos.lineno, 2 );
  303          assert( ts[4].pos.column == 6 );                                         |   301          assert_eq( ts[4].pos.column, 6 );
  304          assert( ts[4].str == ":" );                                              |   302          assert_eq( ts[4].str, ":" );
                                                                                        >   303  
                                                                                        >   304          assert_eq( ts[5].pos.lineno, 2 );
                                                                                        >   305          assert_eq( ts[5].pos.column, 7 );
                                                                                        >   306          assert_eq( ts[5].str, "-" );
  305                                                                                       307  
  306          assert( ts[5].pos.lineno == 2 );                                         |   308          assert_eq( ts[6].pos.lineno, 2 );
  307          assert( ts[5].pos.column == 7 );                                         |   309          assert_eq( ts[6].pos.column, 8 );
  308          assert( ts[5].str == "-" );                                              |   310          assert_eq( ts[6].str, "(" );
  309                                                                                       311  
  310          assert( ts[6].pos.lineno == 2 );                                         <
  311          assert( ts[6].pos.column == 8 );                                         <
  312          assert( ts[6].str == "(" );                                              <
  313                                                                                   <
  314          assert( ts.length == 7 );                                                |   312          assert_eq( ts.length, 7 );
  315  }                                                                                    313  }
  316                                                                                       314  
  317  unittest                                                                             315  unittest
  318  {                                                                                    316  {
  319          auto lex2 = lexerFromString(" a12\n3a 5 ");                                  317          auto lex2 = lexerFromString(" a12\n3a 5 ");
  320          assert( lex2.front.str == "a12" );                                       |   318          assert_eq( lex2.front.str, "a12" );
  321          assert( lex2.front.kind == Token.Kind.identifier );                      <
  322          lex2.popFront;                                                               319          lex2.popFront;
  323          auto lex3 = lex2.save;                                                       320          auto lex3 = lex2.save;
  324          assert( lex2.front.str == "3a" );                                        |   321          assert_eq( lex2.front.str, "3a" );
  325          assert( lex2.front.kind == Token.Kind.identifier );                      <
  326          lex2.popFront;                                                               322          lex2.popFront;
  327          assert( lex3.front.str == "3a" );                                        |   323          assert_eq( lex3.front.str, "3a" );
  328          assert( lex3.front.kind == Token.Kind.identifier );                      <
  329          assert( lex2.front.str == "5" );                                         |   324          assert_eq( lex2.front.str, "5" );
  330          assert( lex2.front.kind == Token.Kind.number );                          <
  331          lex2.popFront;                                                               325          lex2.popFront;
  332          lex3.popFront;                                                               326          lex3.popFront;
  333          assert( lex2.empty );                                                        327          assert( lex2.empty );
  334          assert( !lex3.empty );                                                       328          assert( !lex3.empty );
  335          assert( lex3.front.str == "5" );                                         |   329          assert_eq( lex3.front.str, "5" );
  336          assert( lex3.front.kind == Token.Kind.number );                          <
  337  }                                                                                    330  }
  338                                                                                       331  
  339  unittest                                                                             332  unittest
  340  {                                                                                    333  {
  341  //!! be sure to run the unittest on the root of the source directory                 334  //!! be sure to run the unittest on the root of the source directory
  342          auto lexf = lexerFromFile("polemy/lex.d");                                   335          auto lexf = lexerFromFile("polemy/lex.d");
  343          lexf = find!`a.str == "module"`(lexf);                                       336          lexf = find!`a.str == "module"`(lexf);
  344          assert( lexf.front.str == "module", lexf.front.str );                    |   337          assert_eq( lexf.front.str, "module" );
  345          assert( lexf.front.pos.filename == "polemy/lex.d" );                     |   338          assert_eq( lexf.front.pos.filename, "polemy/lex.d" );
  346          assert( lexf.front.pos.lineno == 7 );                                    |   339          assert_eq( lexf.front.pos.lineno, 7 );
  347          assert( lexf.front.pos.column == 1 );                                    |   340          assert_eq( lexf.front.pos.column, 1 );
                                                                                        >   341          lexf.popFront;
                                                                                        >   342          assert_eq( lexf.front.str, "polemy" );
                                                                                        >   343          assert_eq( lexf.front.pos.lineno, 7 );
                                                                                        >   344          assert_eq( lexf.front.pos.column, 8 );
                                                                                        >   345          lexf.popFront;
                                                                                        >   346          assert_eq( lexf.front.str, "." );
  348          lexf.popFront;                                                               347          lexf.popFront;
  349          assert( lexf.front.str == "polemy" );                                    |   348          assert_eq( lexf.front.str, "lex" );
  350          assert( lexf.front.pos.lineno == 7 );                                    <
  351          assert( lexf.front.pos.column == 8 );                                    <
  352          lexf.popFront;                                                               349          lexf.popFront;
  353          assert( lexf.front.str == "." );                                         |   350          assert_eq( lexf.front.str, ";" );
  354          lexf.popFront;                                                               351          lexf.popFront;
  355          assert( lexf.front.str == "lex" );                                       <
  356          lexf.popFront;                                                           <
  357          assert( lexf.front.str == ";" );                                         |   352          assert_eq( lexf.front.str, "import" );
  358          lexf.popFront;                                                           <
  359          assert( lexf.front.str == "import" );                                    <
  360          assert( lexf.front.pos.lineno == 8 );                                    |   353          assert_eq( lexf.front.pos.lineno, 8 );
  361          assert( lexf.front.pos.column == 1 );                                    |   354          assert_eq( lexf.front.pos.column, 1 );
  362  }                                                                                    355  }
  363                                                                                       356  
  364  unittest                                                                             357  unittest
  365  {                                                                                    358  {
  366          auto lex = lexerFromString(`my # comment should                          |   359          auto lex = lexerFromString(`my # comment should`~"\r\n"~`# hey!!
  367  # hey!!                                                                          <
  368  be ignored.                                                                          360  be ignored.
  369  hahaha"hihihi""hu\\\"huhu"#123 aa                                                    361  hahaha"hihihi""hu\\\"huhu"#123 aa
  370  123 aa "aaa                                                                      |   362  123 aa "aaa`~"\r\n"~`bbb # 123`~"\r\n"~`eee"
  371  bbb # 123                                                                        <
  372  eee"                                                                             <
  373  zzz                                                                                  363  zzz
  374  `);                                                                                  364  `);
  375          Token[] ts = std.array.array(lex);                                           365          Token[] ts = std.array.array(lex);
  376          assert( ts[0].str == "my" );                                             |   366          assert_eq( ts[0].str, "my" );
  377          assert( ts[0].pos.lineno == 1 );                                         |   367          assert_eq( ts[0].pos.lineno, 1 );
                                                                                        >   368          assert(   !ts[0].quoted );
  378          assert( ts[1].str == "be" );                                             |   369          assert_eq( ts[1].str, "be" );
  379          assert( ts[1].pos.lineno == 3 );                                         |   370          assert_eq( ts[1].pos.lineno, 3 );
                                                                                        >   371          assert(   !ts[1].quoted );
  380          assert( ts[2].str == "ignored" );                                        |   372          assert_eq( ts[2].str, "ignored" );
                                                                                        >   373          assert(   !ts[2].quoted );
  381          assert( ts[3].str == "." );                                              |   374          assert_eq( ts[3].str, "." );
                                                                                        >   375          assert(   !ts[3].quoted );
  382          assert( ts[4].str == "hahaha" );                                         |   376          assert_eq( ts[4].str, "hahaha" );
  383          assert( ts[4].pos.lineno == 4 );                                         |   377          assert_eq( ts[4].pos.lineno, 4 );
  384          assert( ts[4].kind == Token.Kind.identifier );                           <
                                                                                        >   378          assert(   !ts[4].quoted );
  385          assert( ts[5].str == "hihihi" );                                         |   379          assert_eq( ts[5].str, "hihihi" );
  386          assert( ts[5].pos.lineno == 4 );                                         |   380          assert_eq( ts[5].pos.lineno, 4 );
  387          assert( ts[5].kind == Token.Kind.stringLiteral );                        <
                                                                                        >   381          assert(    ts[5].quoted );
  388          assert( ts[6].str == `hu\"huhu` );                                       |   382          assert_eq( ts[6].str, `hu\"huhu` );
  389          assert( ts[6].kind == Token.Kind.stringLiteral );                        <
  390          assert( ts[6].pos.lineno == 4 );                                         |   383          assert_eq( ts[6].pos.lineno, 4 );
                                                                                        >   384          assert(    ts[6].quoted );
  391          assert( ts[7].str == "123" );                                            |   385          assert_eq( ts[7].str, "123" );
  392          assert( ts[7].pos.lineno == 5 );                                         |   386          assert_eq( ts[7].pos.lineno, 5 );
  393          assert( ts[7].kind == Token.Kind.number );                               <
  394          assert( ts[8].str == "aa" );                                             |   387          assert_eq( ts[8].str, "aa" );
  395          assert( ts[9].pos.lineno == 5 );                                         |   388          assert_eq( ts[9].pos.lineno, 5 );
  396          assert( ts[9].str == "aaa\nbbb # 123\neee" );                            |   389          assert_eq( ts[9].str, "aaa\nbbb # 123\neee" );
  397          assert( ts[9].kind == Token.Kind.stringLiteral );                        <
                                                                                        >   390          assert(    ts[9].quoted );
  398          assert( ts[10].pos.lineno == 8 );                                        |   391          assert_eq( ts[10].pos.lineno, 8 );
                                                                                        >   392          assert(   !ts[10].quoted );
  399          assert( ts.length == 11 );                                               |   393          assert_eq( ts.length, 11 );
  400  }                                                                                    394  }