Differences From Artifact [783ee3b0fe58558e]:
- File
polemy/lex.d
- 2010-11-08 06:19:57 - part of checkin [61998c472a] on branch trunk - Introduced unittest helpers (assert_eq, assert_throw, etc). Mmigration to it is not done yet. (user: kinaba) [annotate]
To Artifact [0972f7a454ea8e4f]:
- File
polemy/lex.d
- 2010-11-08 08:45:51 - part of checkin [8d297342aa] on branch trunk - Replaced Token.Kind with bool quoted (user: kinaba) [annotate]
44 44 assert( !__traits(compiles, p.column =222) );
45 45 }
46 46
47 47 /// Represents a lexer token
48 48
49 49 class Token
50 50 {
51 - /// currently we have three kinds of token
52 - enum Kind {
53 - identifier, /// anything other than others
54 - stringLiteral, /// "string literal"
55 - number /// 42
56 - };
57 - immutable LexPosition pos; /// position where the token occurred in the source
58 - immutable string str; /// the token string itself
59 - immutable Kind kind; /// which kind of token?
51 + immutable LexPosition pos; /// Position where the token occurred in the source
52 + immutable string str; /// The token string itself
53 + immutable bool quoted; /// Was it a "quoted" token or unquoted?
60 54
61 55 mixin SimpleConstructor;
62 56 mixin SimpleCompare;
63 57 }
64 58
65 59 unittest
66 60 {
67 61 auto p = new immutable(LexPosition)("hello.cpp", 123, 45);
68 - auto t = new Token(p, "class", Token.Kind.identifier);
62 + auto t = new Token(p, "class", false);
63 + auto u = new Token(p, "class", true);
69 64
70 65 assert_eq( t.pos, p );
71 66 assert_eq( t.str, "class" );
72 - assert_eq( t, new Token(p, "class", Token.Kind.identifier) );
73 - assert_lt( t, new Token(p, "struct", Token.Kind.identifier) );
67 + assert( !t.quoted );
68 + assert_eq( t, new Token(p, "class", false) );
69 + assert_lt( t, new Token(p, "struct", false) );
70 + assert_ne( t, u );
71 + assert( u.quoted );
74 72
75 73 assert( !__traits(compiles, new Token) );
76 74 assert( !__traits(compiles, t.pos=p) );
77 75 assert( !__traits(compiles, t.str=789) );
76 + assert( !__traits(compiles, t.quoted=true) );
78 77 }
79 78
80 79 /// Named Construtor for Lexer
81 80
82 81 Lexer lexerFromFile(T...)( string filename, T rest )
83 82 {
84 83 return lexerFromString( std.file.readText(filename), filename, rest );
................................................................................
113 112 std.exception.enforce(current, "Lexer has already reached the end");
114 113 current = readNext();
115 114 }
116 115
117 116 /// Range primitive
118 117 Lexer save() /*@property*/
119 118 {
120 - return new Lexer(buffer, filename, lineno, column, current);
119 + return new Lexer(this.tupleof);
121 120 }
122 121
123 122 private: // implementation
124 123
125 124 string buffer;
126 125 string filename;
127 126 int lineno;
................................................................................
227 226 column = 1;
228 227 }
229 228 else
230 229 lit ~= c;
231 230 }
232 231 if( !buffer.empty )
233 232 readChar();
234 - return new Token(pos, lit, Token.Kind.stringLiteral);
233 + return new Token(pos, lit, true);
235 234 }
236 235 else
237 236 {
238 237 // normal symbol
239 238 auto pos = currentPosition();
240 239 auto str = ""~readChar();
241 - return new Token(pos, str, Token.Kind.identifier);
240 + return new Token(pos, str, false);
242 241 }
243 242 }
244 243 else
245 244 {
246 245 auto pos = currentPosition();
247 246 int i = 0;
248 247 while( i<buffer.length && !std.ctype.isspace(buffer[i]) && !isSymbol(buffer[i]) )
249 248 ++i;
250 249 auto str = buffer[0 .. i];
251 250 buffer = buffer[i .. $];
252 251 column += i;
253 - bool isNumber = find!(`a<'0' || '9'<a`)(str).empty;
254 - return new Token(pos, str, isNumber ? Token.Kind.number : Token.Kind.identifier);
252 + return new Token(pos, str, false);
255 253 }
256 254 }
257 255
258 256 bool isSymbol(char c)
259 257 {
260 258 return (0x21<=c && c<=0x7f && !std.ctype.isalnum(c) && c!='_');
261 259 }
................................................................................
269 267 unittest
270 268 {
271 269 assert( std.range.isForwardRange!(Lexer) );
272 270 }
273 271
274 272 unittest
275 273 {
276 - auto lex = lexerFromString("this is a \t\n pen :-( ");
274 + auto lex = lexerFromString("this is a \t\r\n pen :-( ");
277 275 Token[] ts = std.array.array(lex);
278 276
279 - assert( ts[0].pos.lineno == 1 );
280 - assert( ts[0].pos.column == 1 );
281 - assert( ts[0].kind == Token.Kind.identifier );
282 - assert( ts[0].str == "this" );
277 + assert_eq( ts[0].pos.lineno, 1 );
278 + assert_eq( ts[0].pos.column, 1 );
279 + assert( !ts[0].quoted );
280 + assert_eq( ts[0].str, "this" );
281 +
282 + assert_eq( ts[1].pos.lineno, 1 );
283 + assert_eq( ts[1].pos.column, 6 );
284 + assert( !ts[1].quoted );
285 + assert_eq( ts[1].str, "is" );
283 286
284 - assert( ts[1].pos.lineno == 1 );
285 - assert( ts[1].pos.column == 6 );
286 - assert( ts[1].kind == Token.Kind.identifier );
287 - assert( ts[1].str == "is" );
287 + assert_eq( ts[2].pos.lineno, 1 );
288 + assert_eq( ts[2].pos.column, 9 );
289 + assert( !ts[2].quoted );
290 + assert_eq( ts[2].str, "a" );
288 291
289 - assert( ts[2].pos.lineno == 1 );
290 - assert( ts[2].pos.column == 9 );
291 - assert( ts[2].kind == Token.Kind.identifier );
292 - assert( ts[2].str == "a" );
293 -
294 - assert( ts[3].pos.lineno == 2 );
295 - assert( ts[3].pos.column == 2 );
296 - assert( ts[3].kind == Token.Kind.identifier );
297 - assert( ts[3].str == "pen" );
292 + assert_eq( ts[3].pos.lineno, 2 );
293 + assert_eq( ts[3].pos.column, 2 );
294 + assert( !ts[3].quoted );
295 + assert_eq( ts[3].str, "pen" );
298 296
299 297 // consecutive symbols are always separated
300 298 // hence, no "++" or "<<" or ...
301 299
302 - assert( ts[4].pos.lineno == 2 );
303 - assert( ts[4].pos.column == 6 );
304 - assert( ts[4].str == ":" );
300 + assert_eq( ts[4].pos.lineno, 2 );
301 + assert_eq( ts[4].pos.column, 6 );
302 + assert_eq( ts[4].str, ":" );
303 +
304 + assert_eq( ts[5].pos.lineno, 2 );
305 + assert_eq( ts[5].pos.column, 7 );
306 + assert_eq( ts[5].str, "-" );
305 307
306 - assert( ts[5].pos.lineno == 2 );
307 - assert( ts[5].pos.column == 7 );
308 - assert( ts[5].str == "-" );
308 + assert_eq( ts[6].pos.lineno, 2 );
309 + assert_eq( ts[6].pos.column, 8 );
310 + assert_eq( ts[6].str, "(" );
309 311
310 - assert( ts[6].pos.lineno == 2 );
311 - assert( ts[6].pos.column == 8 );
312 - assert( ts[6].str == "(" );
313 -
314 - assert( ts.length == 7 );
312 + assert_eq( ts.length, 7 );
315 313 }
316 314
317 315 unittest
318 316 {
319 317 auto lex2 = lexerFromString(" a12\n3a 5 ");
320 - assert( lex2.front.str == "a12" );
321 - assert( lex2.front.kind == Token.Kind.identifier );
318 + assert_eq( lex2.front.str, "a12" );
322 319 lex2.popFront;
323 320 auto lex3 = lex2.save;
324 - assert( lex2.front.str == "3a" );
325 - assert( lex2.front.kind == Token.Kind.identifier );
321 + assert_eq( lex2.front.str, "3a" );
326 322 lex2.popFront;
327 - assert( lex3.front.str == "3a" );
328 - assert( lex3.front.kind == Token.Kind.identifier );
329 - assert( lex2.front.str == "5" );
330 - assert( lex2.front.kind == Token.Kind.number );
323 + assert_eq( lex3.front.str, "3a" );
324 + assert_eq( lex2.front.str, "5" );
331 325 lex2.popFront;
332 326 lex3.popFront;
333 327 assert( lex2.empty );
334 328 assert( !lex3.empty );
335 - assert( lex3.front.str == "5" );
336 - assert( lex3.front.kind == Token.Kind.number );
329 + assert_eq( lex3.front.str, "5" );
337 330 }
338 331
339 332 unittest
340 333 {
341 334 //!! be sure to run the unittest on the root of the source directory
342 335 auto lexf = lexerFromFile("polemy/lex.d");
343 336 lexf = find!`a.str == "module"`(lexf);
344 - assert( lexf.front.str == "module", lexf.front.str );
345 - assert( lexf.front.pos.filename == "polemy/lex.d" );
346 - assert( lexf.front.pos.lineno == 7 );
347 - assert( lexf.front.pos.column == 1 );
337 + assert_eq( lexf.front.str, "module" );
338 + assert_eq( lexf.front.pos.filename, "polemy/lex.d" );
339 + assert_eq( lexf.front.pos.lineno, 7 );
340 + assert_eq( lexf.front.pos.column, 1 );
341 + lexf.popFront;
342 + assert_eq( lexf.front.str, "polemy" );
343 + assert_eq( lexf.front.pos.lineno, 7 );
344 + assert_eq( lexf.front.pos.column, 8 );
345 + lexf.popFront;
346 + assert_eq( lexf.front.str, "." );
348 347 lexf.popFront;
349 - assert( lexf.front.str == "polemy" );
350 - assert( lexf.front.pos.lineno == 7 );
351 - assert( lexf.front.pos.column == 8 );
348 + assert_eq( lexf.front.str, "lex" );
352 349 lexf.popFront;
353 - assert( lexf.front.str == "." );
350 + assert_eq( lexf.front.str, ";" );
354 351 lexf.popFront;
355 - assert( lexf.front.str == "lex" );
356 - lexf.popFront;
357 - assert( lexf.front.str == ";" );
358 - lexf.popFront;
359 - assert( lexf.front.str == "import" );
360 - assert( lexf.front.pos.lineno == 8 );
361 - assert( lexf.front.pos.column == 1 );
352 + assert_eq( lexf.front.str, "import" );
353 + assert_eq( lexf.front.pos.lineno, 8 );
354 + assert_eq( lexf.front.pos.column, 1 );
362 355 }
363 356
364 357 unittest
365 358 {
366 - auto lex = lexerFromString(`my # comment should
367 -# hey!!
359 + auto lex = lexerFromString(`my # comment should`~"\r\n"~`# hey!!
368 360 be ignored.
369 361 hahaha"hihihi""hu\\\"huhu"#123 aa
370 -123 aa "aaa
371 -bbb # 123
372 -eee"
362 +123 aa "aaa`~"\r\n"~`bbb # 123`~"\r\n"~`eee"
373 363 zzz
374 364 `);
375 365 Token[] ts = std.array.array(lex);
376 - assert( ts[0].str == "my" );
377 - assert( ts[0].pos.lineno == 1 );
378 - assert( ts[1].str == "be" );
379 - assert( ts[1].pos.lineno == 3 );
380 - assert( ts[2].str == "ignored" );
381 - assert( ts[3].str == "." );
382 - assert( ts[4].str == "hahaha" );
383 - assert( ts[4].pos.lineno == 4 );
384 - assert( ts[4].kind == Token.Kind.identifier );
385 - assert( ts[5].str == "hihihi" );
386 - assert( ts[5].pos.lineno == 4 );
387 - assert( ts[5].kind == Token.Kind.stringLiteral );
388 - assert( ts[6].str == `hu\"huhu` );
389 - assert( ts[6].kind == Token.Kind.stringLiteral );
390 - assert( ts[6].pos.lineno == 4 );
391 - assert( ts[7].str == "123" );
392 - assert( ts[7].pos.lineno == 5 );
393 - assert( ts[7].kind == Token.Kind.number );
394 - assert( ts[8].str == "aa" );
395 - assert( ts[9].pos.lineno == 5 );
396 - assert( ts[9].str == "aaa\nbbb # 123\neee" );
397 - assert( ts[9].kind == Token.Kind.stringLiteral );
398 - assert( ts[10].pos.lineno == 8 );
399 - assert( ts.length == 11 );
366 + assert_eq( ts[0].str, "my" );
367 + assert_eq( ts[0].pos.lineno, 1 );
368 + assert( !ts[0].quoted );
369 + assert_eq( ts[1].str, "be" );
370 + assert_eq( ts[1].pos.lineno, 3 );
371 + assert( !ts[1].quoted );
372 + assert_eq( ts[2].str, "ignored" );
373 + assert( !ts[2].quoted );
374 + assert_eq( ts[3].str, "." );
375 + assert( !ts[3].quoted );
376 + assert_eq( ts[4].str, "hahaha" );
377 + assert_eq( ts[4].pos.lineno, 4 );
378 + assert( !ts[4].quoted );
379 + assert_eq( ts[5].str, "hihihi" );
380 + assert_eq( ts[5].pos.lineno, 4 );
381 + assert( ts[5].quoted );
382 + assert_eq( ts[6].str, `hu\"huhu` );
383 + assert_eq( ts[6].pos.lineno, 4 );
384 + assert( ts[6].quoted );
385 + assert_eq( ts[7].str, "123" );
386 + assert_eq( ts[7].pos.lineno, 5 );
387 + assert_eq( ts[8].str, "aa" );
388 + assert_eq( ts[9].pos.lineno, 5 );
389 + assert_eq( ts[9].str, "aaa\nbbb # 123\neee" );
390 + assert( ts[9].quoted );
391 + assert_eq( ts[10].pos.lineno, 8 );
392 + assert( !ts[10].quoted );
393 + assert_eq( ts.length, 11 );
400 394 }