S -> T T -> a b U U -> c
b) Language:
a b c
S -> T T U T -> a U U -> b c
b) Language:
a b c a b c b c
S -> a T | b T T -> U c | V c U -> a b V -> b b
b) Language:
a a b c a b b c b a b c b b b c
S -> a T S | b T -> a b c
b) Language:
b aabcb aabcaabcb aabcaabcaabcb ...With a regular expression: (aabc)*b
S -> a b S | b a
With a regular expression: (ab)*ba
b) Parser:
// A simple parser: parser-5.c
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
int lookahead;
void error(void) {
printf("Syntax error: unexpected token '%c'\n", lookahead);
exit(EXIT_FAILURE);
}
// A simple scanner.
// Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored.
int scan(void) {
int input;
while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF)
;
return input;
}
void match(int expected) {
if (lookahead == expected)
lookahead = scan();
else
error();
}
// These are the non-terminals, calling each other recursively
void S(void);
void S(void) {
if (lookahead == 'a') {
match('a'); match('b'); S();
}
else if (lookahead == 'b') {
match('b'); match('a');
}
else {
error();
}
}
int main(void) {
printf("Parser 5. Enter input. End with EOF (CTRL-D on Linux).\n");
lookahead = scan(); // To get started, so we have somthing to compare to
S();
if (lookahead != EOF)
error();
printf("Done!\n");
return EXIT_SUCCESS;
}
b) Some examples of invalid input:
c) Parser:
// A parser for a subset of C: parser-6.c
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
// Single-character tokens are reprseted by their character code.
// These are the other token codes.
enum token_codes {
VARIABLE = 1000,
INTEGER_LITERAL = 1001,
IF = 1002
};
int lookahead;
void error(void) {
printf("[Syntax error: unexpected token '%c' (code %d)]\n",
(isprint(lookahead) ? lookahead : '?'), lookahead);
exit(EXIT_FAILURE);
}
// A scanner for a subset of C.
// Returns one of the tokens '(', ')', ';', '<' and ';',
// or one of the tokens VARIABLE, INTEGER_LITERAL, IF, or EOF.
// Whitespace is ignored.
// Any other character is an error.
int scan(void) {
while(1) {
int c = getchar();
if (c == EOF)
return EOF;
else if (isspace(c))
; // Just skip whitespace
else if (isdigit(c)) {
// This is the start of an integer literal
// Read it, but ignore the value
while (isdigit(c)) {
c = getchar();
}
if (c != EOF)
ungetc(c, stdin);
return INTEGER_LITERAL;
}
else if (isalpha(c)) {
// This is the start of a keyword or variable name
#define MAX_ID_LENGTH 100
char buffer[MAX_ID_LENGTH + 1];
int n = 0;
while (isalpha(c)) {
buffer[n++] = c;
if (n > MAX_ID_LENGTH) {
printf("[Error: Identifier too long]\n");
exit(EXIT_FAILURE);
}
c = getchar();
}
buffer[n] = '\0';
if (c != EOF)
ungetc(c, stdin);
if (strcmp(buffer, "if") == 0)
return IF;
else
return VARIABLE;
}
else if (strchr("();<=", c) != NULL)
return c;
else {
printf("[Error: Unknown character: '%c']\n", c);
exit(EXIT_FAILURE);
}
} // while
} // scan
void match(int expected) {
if (lookahead == expected)
lookahead = scan();
else
error();
}
// These are the non-terminals, calling each other recursively
void statement_list(void), statement(void), condition(void);
void statement_list(void) {
if (lookahead == VARIABLE || lookahead == IF) {
statement(); statement_list();
}
else {
// Anything is ok here, since an empty sequence of tokens will fit anywhere
}
} // statement_list
void statement(void) {
if (lookahead == VARIABLE) {
match(VARIABLE); match('='); match(INTEGER_LITERAL); match(';');
}
else if (lookahead == IF) {
match(IF); match('('); condition(); match(')'); statement();
}
else {
error();
}
} // statement
void condition(void) {
match(VARIABLE); match('<'); match(VARIABLE);
}
int main(void) {
printf("Parser 6. Enter input. End with EOF (CTRL-D on Linux).\n");
lookahead = scan(); // To get started, so we have somthing to compare to
statement_list();
if (lookahead != EOF)
error();
printf("Done!\n");
return EXIT_SUCCESS;
}