a) What is the grammar that this parser implements?
b) What is the language that it defines?
(Some helpful hints: It only understands the tokens, or terminals, a, b and c. Finish the input with EOF, which in Linux is entered using CTRL-D on a separate line. All other input, such as d, is ignored. You can compile and run the program if you want, but you don't have to.)
// A simple parser: parser-1.c
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
int lookahead;
void error(void) {
printf("Syntax error: unexpected token '%c'\n", lookahead);
exit(EXIT_FAILURE);
}
// A simple scanner.
// Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored.
int scan(void) {
int input;
while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF)
;
return input;
}
void match(int expected) {
if (lookahead == expected)
lookahead = scan();
else
error();
}
// These are the non-terminals, calling each other recursively
void S(void), T(void), U(void);
void S(void) {
T();
}
void T(void) {
match('a'); match('b'); U();
}
void U(void) {
match('c');
}
int main(void) {
printf("Parser 1. Enter input. End with EOF (CTRL-D on Linux).\n");
lookahead = scan(); // To get started, so we have somthing to compare to
S();
if (lookahead != EOF)
error();
printf("Done!\n");
return EXIT_SUCCESS;
}
a) What is the grammar that this parser implements?
b) What is the language that it defines?
// A simple parser: parser-2.c
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
int lookahead;
void error(void) {
printf("Syntax error: unexpected token '%c'\n", lookahead);
exit(EXIT_FAILURE);
}
// A simple scanner.
// Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored.
int scan(void) {
int input;
while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF)
;
return input;
}
void match(int expected) {
if (lookahead == expected)
lookahead = scan();
else
error();
}
// These are the non-terminals, calling each other recursively
void S(void), T(void), U(void);
void S(void) {
T(); T(); U();
}
void T(void) {
match('a'); U();
}
void U(void) {
match('b'); match('c');
}
int main(void) {
printf("Parser 2. Enter input. End with EOF (CTRL-D on Linux).\n");
lookahead = scan(); // To get started, so we have somthing to compare to
S();
if (lookahead != EOF)
error();
printf("Done!\n");
return EXIT_SUCCESS;
}
a) What is the grammar that this parser implements?
b) What is the language that it defines?
// A simple parser: parser-3.c
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
int lookahead;
void error(void) {
printf("Syntax error: unexpected token '%c'\n", lookahead);
exit(EXIT_FAILURE);
}
// A simple scanner.
// Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored.
int scan(void) {
int input;
while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF)
;
return input;
}
void match(int expected) {
if (lookahead == expected)
lookahead = scan();
else
error();
}
// These are the non-terminals, calling each other recursively
void S(void), T(void), U(void), V(void);
void S(void) {
if (lookahead == 'a') {
match('a'); T();
}
else if (lookahead == 'b') {
match('b'); T();
}
else {
error();
}
}
void T(void) {
if (lookahead == 'a') {
U(); match('c');
}
else if (lookahead == 'b') {
V(); match('c');
}
else {
error();
}
}
void U(void) {
match('a'); match('b');
}
void V(void) {
match('b'); match('b');
}
int main(void) {
printf("Parser 3. Enter input. End with EOF (CTRL-D on Linux).\n");
lookahead = scan(); // To get started, so we have somthing to compare to
S();
if (lookahead != EOF)
error();
printf("Done!\n");
return EXIT_SUCCESS;
}
a) What is the grammar that this parser implements?
b) What is the language that it defines?
// A simple parser: parser-4.c
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
int lookahead;
void error(void) {
printf("Syntax error: unexpected token '%c'\n", lookahead);
exit(EXIT_FAILURE);
}
// A simple scanner.
// Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored.
int scan(void) {
int input;
while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF)
;
return input;
}
void match(int expected) {
if (lookahead == expected)
lookahead = scan();
else
error();
}
// These are the non-terminals, calling each other recursively
void S(void), T(void);
void S(void) {
if (lookahead == 'a') {
match('a'); T(); S();
}
else if (lookahead == 'b') {
match('b');
}
else {
error();
}
}
void T(void) {
match('a'); match('b'); match('c');
}
int main(void) {
printf("Parser 4. Enter input. End with EOF (CTRL-D on Linux).\n");
lookahead = scan(); // To get started, so we have somthing to compare to
S();
if (lookahead != EOF)
error();
printf("Done!\n");
return EXIT_SUCCESS;
}
a) Write a grammar for this language.
b) Write a parser that implements this grammar.
The terminal variable is a variable name, and consists of one or more lower-case letters. The terminal integer-literal is an integer literal, and consists of one or more normal decimal digits. Empty stands for an empty production. The start symbol is statement-list.statement-list -> statement statement-list | empty statement -> variable = integer-literal ; condition -> variable < variable statement -> if ( condition ) statement
As an example of valid input, we saw this:
if (hjalmar < hulda) if (z < x) w = 33; kalle = 17;a) Write some more examples of valid input.
b) Write some examples of invalid input.
c) Write a parser for the language. Verify that the inputs in a and b are handled as expected. Below is a program skeleton with a scanner and some definitions, so you only have to add the actual parsing part.
// A parser for a subset of C: parser-6.c
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
// Single-character tokens are reprseted by their character code.
// These are the other token codes.
enum token_codes {
VARIABLE = 1000,
INTEGER_LITERAL = 1001,
IF = 1002
};
int lookahead;
void error(void) {
printf("[Syntax error: unexpected token '%c' (code %d)]\n",
(isprint(lookahead) ? lookahead : '?'), lookahead);
exit(EXIT_FAILURE);
}
// A scanner for a subset of C.
// Returns one of the tokens '(', ')', ';', '<' and ';',
// or one of the tokens VARIABLE, INTEGER_LITERAL, IF, or EOF.
// Whitespace is ignored.
// Any other character is an error.
int scan(void) {
while(1) {
int c = getchar();
if (c == EOF)
return EOF;
else if (isspace(c))
; // Just skip whitespace
else if (isdigit(c)) {
// This is the start of an integer literal
// Read it, but ignore the value
while (isdigit(c)) {
c = getchar();
}
if (c != EOF)
ungetc(c, stdin);
return INTEGER_LITERAL;
}
else if (isalpha(c)) {
// This is the start of a keyword or variable name
#define MAX_ID_LENGTH 100
char buffer[MAX_ID_LENGTH + 1];
int n = 0;
while (isalpha(c)) {
buffer[n++] = c;
if (n > MAX_ID_LENGTH) {
printf("[Error: Identifier too long]\n");
exit(EXIT_FAILURE);
}
c = getchar();
}
buffer[n] = '\0';
if (c != EOF)
ungetc(c, stdin);
if (strcmp(buffer, "if") == 0)
return IF;
else
return VARIABLE;
}
else if (strchr("();<=", c) != NULL)
return c;
else {
printf("[Error: Unknown character: '%c']\n", c);
exit(EXIT_FAILURE);
}
} // while
} // scan
void match(int expected) {
if (lookahead == expected)
lookahead = scan();
else
error();
}
// These are the non-terminals, calling each other recursively
void statement_list(void), statement(void), condition(void);
// Write the functions statement_list, statement and condition!
int main(void) {
printf("Parser 6. Enter input. End with EOF (CTRL-D on Linux).\n");
lookahead = scan(); // To get started, so we have somthing to compare to
statement_list();
if (lookahead != EOF)
error();
printf("Done!\n");
return EXIT_SUCCESS;
}
There are some solutions to some of the questions, but try to solve them yourself first.