BNF -> parser assembler 2005-04-12


Ok, so tonight I set myself a goal of preparing an initial version of a BNF grammar with some extensions intended as a starting point for a tool to convert BNF into assembly for my parser assembler (I need to think of a proper name for it soon...). Here is the BNF, and some snippets of how I'm bootstrapping it. The hand converted grammar now parses the whole BNF for itself. Here's my BNF grammar:

    
    ; %triggers define a list of symbolic names for the triggers that the VM will call
    %triggers {
       LP = 1 .
       RP = 2 .
       CUT = 3 .
       CALL = 4 .
       EXP = 5 .
       TDEF = 6 .
       PRODL = 7 .
       PRODR = 8 .
       RULE = 9 .
       SUBL = 10 .
       SUBR = 11 .
       ORL = 12 .
       ORR = 13 .
       STORE = 14 .
       EXP = 15 .
    }
    
    ; '!' represents my "cut" operator. It breaks the VM with the string argument
    ; as an error if the remaining part of the rule fails
    
    bnf          ::= triggers? production* !"EOF Expected" EOF .
    triggers     ::= "%triggers" ws* "{" ws* tdef* "}" ws* .
    tdef         ::= name ws* "=" ws* number ws* "." ws* /TDEF/ .
    production   ::= name /PRODL/ ws* "::=" (ws* rule)* ws* "." ws* /PRODR/ .
    rule         ::= sub_expr /RULE/ .
    
    sub_expr     ::= or_expr ws* ("-" ws* /SUBL/ sub_expr /SUBR/)? .
    or_expr      ::= store_expr ws* ("|" ws* /ORL/ or_expr /ORR/)? .
    store_expr   ::= post_expr ws* ("->" ws* const /STORE/ )? .
    post_expr    ::= cut
                   | call
                   | (primary_expr ws* (('?' | '*' | '+') -> #7)? /EXP/) .
    
    primary_expr ::= paren_expr | keywords | name
                   | string | const | set .
    
    paren_expr   ::= "(" /LP/ ws*
                     !"Expected at least one rule inside parentheses"
                     rule+ ws*
                     !"Expected )"
                     ")" /RP/ .
    
    cut          ::= "!" -> #7 string /CUT/.
    call         ::= "/" -> #7 (number|name) "/" /CALL/.
    const        ::= "#" number .
    
    ; --- "Tokens"
    string       ::= ('"' ["]* -> #6 '"') | ("'" [']* -> #6 "'") .
    keywords     ::= ("ANY" | "EOF") -> #1 .
    name         ::= ([a-zA-Z][a-zA-Z0-9_\-]*) -> #2 .
    number       ::= (base10 | base16) .
    base10       ::= [0-9]+ -> #3 .
    base16       ::= 'x' [0-9a-fA-F]+ -> #4 .
    set          ::= '[' ('~'? (any - ']')*) -> #5 ']' .
    ws           ::= ' ' | #9 | #xD | #xA  | ';' (ANY - #xA)* #xA .

An excerpt of the parser assembler translation. I've tried making it match what I expect to make the BNF tool generate reasonably well, but not exactly:

    :bnf
        kln $ws
        jsr $triggers
        kln $production
        cut "EOF Expected"
        eof
        ret
    
    :triggers
        req "%triggers"
        kln $ws
        req "{"
        kln $ws
        kln $tdef
        req "}"
        kln $ws
        ret
    
    :tdef
        req $name
        kln $ws
        req "="
        kln $ws
        req $number
        kln $ws
        req "."
        kln $ws
        trg #6
        ret
    
    :production
        req $name
        trg #7
        kln $ws
        req "::="
        kln $production_1
        kln $ws
        req "."
        kln $ws
        trg #8
        ret
    
    :production_1
        kln $ws
        req $rule
        ret
    
    :rule
        req $sub_expr
        trg #9
        ret
    
    :paren_expr
        req "("
        trg #1
        kln $ws
        cut "Expected at least one rule"
        req $rule
        kln $rule
        kln $ws
        cut "Expected ')'"
        req ")"
        trg #2
        ret
    
    :sub_expr
        req $or_expr
        kln $ws
        cmp #'-'
        bne $sub_expr_1
        eat
        cut "Expected expression"
        kln $ws
        trg #10
        req $sub_expr
        trg #11
    :sub_expr_1
        ret

Don't have much time for explanations right now, but I'd be happy to answer questions.

blog comments powered by Disqus