Differences From Artifact [783ee3b0fe58558e]:
- File
polemy/lex.d
- 2010-11-08 06:19:57 - part of checkin [61998c472a] on branch trunk - Introduced unittest helpers (assert_eq, assert_throw, etc). Mmigration to it is not done yet. (user: kinaba) [annotate]
To Artifact [0972f7a454ea8e4f]:
- File
polemy/lex.d
- 2010-11-08 08:45:51 - part of checkin [8d297342aa] on branch trunk - Replaced Token.Kind with bool quoted (user: kinaba) [annotate]
44 assert( !__traits(compiles, p.column =222) ); 44 assert( !__traits(compiles, p.column =222) );
45 } 45 }
46 46
47 /// Represents a lexer token 47 /// Represents a lexer token
48 48
49 class Token 49 class Token
50 { 50 {
51 /// currently we have three kinds of token <
52 enum Kind { <
53 identifier, /// anything other than others <
54 stringLiteral, /// "string literal" <
55 number /// 42 <
56 }; <
57 immutable LexPosition pos; /// position where the token occurred in the | 51 immutable LexPosition pos; /// Position where the token occurred in t
58 immutable string str; /// the token string itself | 52 immutable string str; /// The token string itself
59 immutable Kind kind; /// which kind of token? <
> 53 immutable bool quoted; /// Was it a "quoted" token or unquoted?
60 54
61 mixin SimpleConstructor; 55 mixin SimpleConstructor;
62 mixin SimpleCompare; 56 mixin SimpleCompare;
63 } 57 }
64 58
65 unittest 59 unittest
66 { 60 {
67 auto p = new immutable(LexPosition)("hello.cpp", 123, 45); 61 auto p = new immutable(LexPosition)("hello.cpp", 123, 45);
68 auto t = new Token(p, "class", Token.Kind.identifier); | 62 auto t = new Token(p, "class", false);
> 63 auto u = new Token(p, "class", true);
69 64
70 assert_eq( t.pos, p ); 65 assert_eq( t.pos, p );
71 assert_eq( t.str, "class" ); 66 assert_eq( t.str, "class" );
> 67 assert( !t.quoted );
72 assert_eq( t, new Token(p, "class", Token.Kind.identifier) ); | 68 assert_eq( t, new Token(p, "class", false) );
73 assert_lt( t, new Token(p, "struct", Token.Kind.identifier) ); | 69 assert_lt( t, new Token(p, "struct", false) );
> 70 assert_ne( t, u );
> 71 assert( u.quoted );
74 72
75 assert( !__traits(compiles, new Token) ); 73 assert( !__traits(compiles, new Token) );
76 assert( !__traits(compiles, t.pos=p) ); 74 assert( !__traits(compiles, t.pos=p) );
77 assert( !__traits(compiles, t.str=789) ); 75 assert( !__traits(compiles, t.str=789) );
> 76 assert( !__traits(compiles, t.quoted=true) );
78 } 77 }
79 78
80 /// Named Construtor for Lexer 79 /// Named Construtor for Lexer
81 80
82 Lexer lexerFromFile(T...)( string filename, T rest ) 81 Lexer lexerFromFile(T...)( string filename, T rest )
83 { 82 {
84 return lexerFromString( std.file.readText(filename), filename, rest ); 83 return lexerFromString( std.file.readText(filename), filename, rest );
................................................................................................................................................................................
113 std.exception.enforce(current, "Lexer has already reached the en 112 std.exception.enforce(current, "Lexer has already reached the en
114 current = readNext(); 113 current = readNext();
115 } 114 }
116 115
117 /// Range primitive 116 /// Range primitive
118 Lexer save() /*@property*/ 117 Lexer save() /*@property*/
119 { 118 {
120 return new Lexer(buffer, filename, lineno, column, current); | 119 return new Lexer(this.tupleof);
121 } 120 }
122 121
123 private: // implementation 122 private: // implementation
124 123
125 string buffer; 124 string buffer;
126 string filename; 125 string filename;
127 int lineno; 126 int lineno;
................................................................................................................................................................................
227 column = 1; 226 column = 1;
228 } 227 }
229 else 228 else
230 lit ~= c; 229 lit ~= c;
231 } 230 }
232 if( !buffer.empty ) 231 if( !buffer.empty )
233 readChar(); 232 readChar();
234 return new Token(pos, lit, Token.Kind.stringLite | 233 return new Token(pos, lit, true);
235 } 234 }
236 else 235 else
237 { 236 {
238 // normal symbol 237 // normal symbol
239 auto pos = currentPosition(); 238 auto pos = currentPosition();
240 auto str = ""~readChar(); 239 auto str = ""~readChar();
241 return new Token(pos, str, Token.Kind.identifier | 240 return new Token(pos, str, false);
242 } 241 }
243 } 242 }
244 else 243 else
245 { 244 {
246 auto pos = currentPosition(); 245 auto pos = currentPosition();
247 int i = 0; 246 int i = 0;
248 while( i<buffer.length && !std.ctype.isspace(buffer[i]) 247 while( i<buffer.length && !std.ctype.isspace(buffer[i])
249 ++i; 248 ++i;
250 auto str = buffer[0 .. i]; 249 auto str = buffer[0 .. i];
251 buffer = buffer[i .. $]; 250 buffer = buffer[i .. $];
252 column += i; 251 column += i;
253 bool isNumber = find!(`a<'0' || '9'<a`)(str).empty; <
254 return new Token(pos, str, isNumber ? Token.Kind.number | 252 return new Token(pos, str, false);
255 } 253 }
256 } 254 }
257 255
258 bool isSymbol(char c) 256 bool isSymbol(char c)
259 { 257 {
260 return (0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_'); 258 return (0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_');
261 } 259 }
................................................................................................................................................................................
269 unittest 267 unittest
270 { 268 {
271 assert( std.range.isForwardRange!(Lexer) ); 269 assert( std.range.isForwardRange!(Lexer) );
272 } 270 }
273 271
274 unittest 272 unittest
275 { 273 {
276 auto lex = lexerFromString("this is a \t\n pen :-( "); | 274 auto lex = lexerFromString("this is a \t\r\n pen :-( ");
277 Token[] ts = std.array.array(lex); 275 Token[] ts = std.array.array(lex);
278 276
279 assert( ts[0].pos.lineno == 1 ); | 277 assert_eq( ts[0].pos.lineno, 1 );
280 assert( ts[0].pos.column == 1 ); | 278 assert_eq( ts[0].pos.column, 1 );
281 assert( ts[0].kind == Token.Kind.identifier ); | 279 assert( !ts[0].quoted );
282 assert( ts[0].str == "this" ); | 280 assert_eq( ts[0].str, "this" );
> 281
> 282 assert_eq( ts[1].pos.lineno, 1 );
> 283 assert_eq( ts[1].pos.column, 6 );
> 284 assert( !ts[1].quoted );
> 285 assert_eq( ts[1].str, "is" );
283 286
284 assert( ts[1].pos.lineno == 1 ); | 287 assert_eq( ts[2].pos.lineno, 1 );
285 assert( ts[1].pos.column == 6 ); | 288 assert_eq( ts[2].pos.column, 9 );
286 assert( ts[1].kind == Token.Kind.identifier ); | 289 assert( !ts[2].quoted );
287 assert( ts[1].str == "is" ); | 290 assert_eq( ts[2].str, "a" );
288 291
289 assert( ts[2].pos.lineno == 1 ); <
290 assert( ts[2].pos.column == 9 ); <
291 assert( ts[2].kind == Token.Kind.identifier ); <
292 assert( ts[2].str == "a" ); <
293 <
294 assert( ts[3].pos.lineno == 2 ); | 292 assert_eq( ts[3].pos.lineno, 2 );
295 assert( ts[3].pos.column == 2 ); | 293 assert_eq( ts[3].pos.column, 2 );
296 assert( ts[3].kind == Token.Kind.identifier ); <
> 294 assert( !ts[3].quoted );
297 assert( ts[3].str == "pen" ); | 295 assert_eq( ts[3].str, "pen" );
298 296
299 // consecutive symbols are always separated 297 // consecutive symbols are always separated
300 // hence, no "++" or "<<" or ... 298 // hence, no "++" or "<<" or ...
301 299
302 assert( ts[4].pos.lineno == 2 ); | 300 assert_eq( ts[4].pos.lineno, 2 );
303 assert( ts[4].pos.column == 6 ); | 301 assert_eq( ts[4].pos.column, 6 );
304 assert( ts[4].str == ":" ); | 302 assert_eq( ts[4].str, ":" );
> 303
> 304 assert_eq( ts[5].pos.lineno, 2 );
> 305 assert_eq( ts[5].pos.column, 7 );
> 306 assert_eq( ts[5].str, "-" );
305 307
306 assert( ts[5].pos.lineno == 2 ); | 308 assert_eq( ts[6].pos.lineno, 2 );
307 assert( ts[5].pos.column == 7 ); | 309 assert_eq( ts[6].pos.column, 8 );
308 assert( ts[5].str == "-" ); | 310 assert_eq( ts[6].str, "(" );
309 311
310 assert( ts[6].pos.lineno == 2 ); <
311 assert( ts[6].pos.column == 8 ); <
312 assert( ts[6].str == "(" ); <
313 <
314 assert( ts.length == 7 ); | 312 assert_eq( ts.length, 7 );
315 } 313 }
316 314
317 unittest 315 unittest
318 { 316 {
319 auto lex2 = lexerFromString(" a12\n3a 5 "); 317 auto lex2 = lexerFromString(" a12\n3a 5 ");
320 assert( lex2.front.str == "a12" ); | 318 assert_eq( lex2.front.str, "a12" );
321 assert( lex2.front.kind == Token.Kind.identifier ); <
322 lex2.popFront; 319 lex2.popFront;
323 auto lex3 = lex2.save; 320 auto lex3 = lex2.save;
324 assert( lex2.front.str == "3a" ); | 321 assert_eq( lex2.front.str, "3a" );
325 assert( lex2.front.kind == Token.Kind.identifier ); <
326 lex2.popFront; 322 lex2.popFront;
327 assert( lex3.front.str == "3a" ); | 323 assert_eq( lex3.front.str, "3a" );
328 assert( lex3.front.kind == Token.Kind.identifier ); <
329 assert( lex2.front.str == "5" ); | 324 assert_eq( lex2.front.str, "5" );
330 assert( lex2.front.kind == Token.Kind.number ); <
331 lex2.popFront; 325 lex2.popFront;
332 lex3.popFront; 326 lex3.popFront;
333 assert( lex2.empty ); 327 assert( lex2.empty );
334 assert( !lex3.empty ); 328 assert( !lex3.empty );
335 assert( lex3.front.str == "5" ); | 329 assert_eq( lex3.front.str, "5" );
336 assert( lex3.front.kind == Token.Kind.number ); <
337 } 330 }
338 331
339 unittest 332 unittest
340 { 333 {
341 //!! be sure to run the unittest on the root of the source directory 334 //!! be sure to run the unittest on the root of the source directory
342 auto lexf = lexerFromFile("polemy/lex.d"); 335 auto lexf = lexerFromFile("polemy/lex.d");
343 lexf = find!`a.str == "module"`(lexf); 336 lexf = find!`a.str == "module"`(lexf);
344 assert( lexf.front.str == "module", lexf.front.str ); | 337 assert_eq( lexf.front.str, "module" );
345 assert( lexf.front.pos.filename == "polemy/lex.d" ); | 338 assert_eq( lexf.front.pos.filename, "polemy/lex.d" );
346 assert( lexf.front.pos.lineno == 7 ); | 339 assert_eq( lexf.front.pos.lineno, 7 );
347 assert( lexf.front.pos.column == 1 ); | 340 assert_eq( lexf.front.pos.column, 1 );
> 341 lexf.popFront;
> 342 assert_eq( lexf.front.str, "polemy" );
> 343 assert_eq( lexf.front.pos.lineno, 7 );
> 344 assert_eq( lexf.front.pos.column, 8 );
> 345 lexf.popFront;
> 346 assert_eq( lexf.front.str, "." );
348 lexf.popFront; 347 lexf.popFront;
349 assert( lexf.front.str == "polemy" ); | 348 assert_eq( lexf.front.str, "lex" );
350 assert( lexf.front.pos.lineno == 7 ); <
351 assert( lexf.front.pos.column == 8 ); <
352 lexf.popFront; 349 lexf.popFront;
353 assert( lexf.front.str == "." ); | 350 assert_eq( lexf.front.str, ";" );
354 lexf.popFront; 351 lexf.popFront;
355 assert( lexf.front.str == "lex" ); <
356 lexf.popFront; <
357 assert( lexf.front.str == ";" ); | 352 assert_eq( lexf.front.str, "import" );
358 lexf.popFront; <
359 assert( lexf.front.str == "import" ); <
360 assert( lexf.front.pos.lineno == 8 ); | 353 assert_eq( lexf.front.pos.lineno, 8 );
361 assert( lexf.front.pos.column == 1 ); | 354 assert_eq( lexf.front.pos.column, 1 );
362 } 355 }
363 356
364 unittest 357 unittest
365 { 358 {
366 auto lex = lexerFromString(`my # comment should | 359 auto lex = lexerFromString(`my # comment should`~"\r\n"~`# hey!!
367 # hey!! <
368 be ignored. 360 be ignored.
369 hahaha"hihihi""hu\\\"huhu"#123 aa 361 hahaha"hihihi""hu\\\"huhu"#123 aa
370 123 aa "aaa | 362 123 aa "aaa`~"\r\n"~`bbb # 123`~"\r\n"~`eee"
371 bbb # 123 <
372 eee" <
373 zzz 363 zzz
374 `); 364 `);
375 Token[] ts = std.array.array(lex); 365 Token[] ts = std.array.array(lex);
376 assert( ts[0].str == "my" ); | 366 assert_eq( ts[0].str, "my" );
377 assert( ts[0].pos.lineno == 1 ); | 367 assert_eq( ts[0].pos.lineno, 1 );
> 368 assert( !ts[0].quoted );
378 assert( ts[1].str == "be" ); | 369 assert_eq( ts[1].str, "be" );
379 assert( ts[1].pos.lineno == 3 ); | 370 assert_eq( ts[1].pos.lineno, 3 );
> 371 assert( !ts[1].quoted );
380 assert( ts[2].str == "ignored" ); | 372 assert_eq( ts[2].str, "ignored" );
> 373 assert( !ts[2].quoted );
381 assert( ts[3].str == "." ); | 374 assert_eq( ts[3].str, "." );
> 375 assert( !ts[3].quoted );
382 assert( ts[4].str == "hahaha" ); | 376 assert_eq( ts[4].str, "hahaha" );
383 assert( ts[4].pos.lineno == 4 ); | 377 assert_eq( ts[4].pos.lineno, 4 );
384 assert( ts[4].kind == Token.Kind.identifier ); <
> 378 assert( !ts[4].quoted );
385 assert( ts[5].str == "hihihi" ); | 379 assert_eq( ts[5].str, "hihihi" );
386 assert( ts[5].pos.lineno == 4 ); | 380 assert_eq( ts[5].pos.lineno, 4 );
387 assert( ts[5].kind == Token.Kind.stringLiteral ); <
> 381 assert( ts[5].quoted );
388 assert( ts[6].str == `hu\"huhu` ); | 382 assert_eq( ts[6].str, `hu\"huhu` );
389 assert( ts[6].kind == Token.Kind.stringLiteral ); <
390 assert( ts[6].pos.lineno == 4 ); | 383 assert_eq( ts[6].pos.lineno, 4 );
> 384 assert( ts[6].quoted );
391 assert( ts[7].str == "123" ); | 385 assert_eq( ts[7].str, "123" );
392 assert( ts[7].pos.lineno == 5 ); | 386 assert_eq( ts[7].pos.lineno, 5 );
393 assert( ts[7].kind == Token.Kind.number ); <
394 assert( ts[8].str == "aa" ); | 387 assert_eq( ts[8].str, "aa" );
395 assert( ts[9].pos.lineno == 5 ); | 388 assert_eq( ts[9].pos.lineno, 5 );
396 assert( ts[9].str == "aaa\nbbb # 123\neee" ); | 389 assert_eq( ts[9].str, "aaa\nbbb # 123\neee" );
397 assert( ts[9].kind == Token.Kind.stringLiteral ); <
> 390 assert( ts[9].quoted );
398 assert( ts[10].pos.lineno == 8 ); | 391 assert_eq( ts[10].pos.lineno, 8 );
> 392 assert( !ts[10].quoted );
399 assert( ts.length == 11 ); | 393 assert_eq( ts.length, 11 );
400 } 394 }