An Ontology for Representing Grammars

Dr Ph. MARTIN

This document mixes HTML and FL (a KRL, i.e., a Knowledge Representation Language; it is here used in the preformatted text parts). It is an input+backup+presentation file for the Multi-Source Ontology (MSO). The MSO  i) aligns and extends several ontologies, and  ii) can be extended by Web users via the shared KB server WebKB-2 and its shared KB (knowledge base) editing protocols.
In this document, "/^" is used instead of "<" and "\." is used instead of ">".

The ontology in this document is original and in construction.
It is part of complementary "ontologies on formal languages".


Table of contents
1.  General ontology with values for some grammars
2.  Bison grammar for a generic parser of KRLs



1.  General ontology with values for some grammars

This ontology covers the main BNF variants, e.g., EBNF of the W3C and
XBNF (additional modules/documentations: MATHS, samples, discussion).
See also this article that lists some grammar/BNF related notions.


Grammar_element
  \. excl{ (Statement_of_a_grammar   //in this document "\." is used instead of ">" 
              = excl{ Non-lexical-grammar_rule  Lexical-grammar_rule },
              \. Head_grammar-rule )
           (Individual_gTerm_of_a_grammar
              \. partition { Non-lexical-grammar_individual-gTerm
                             Lexical-grammar_individual-gTerm 
                           } )
         };

Non-lexical-grammar_rule = NLG_rule,
  r_part: 1 NLG_rule_left-hand-side  1 NLG_expression  0..1 Statement, 
  rc_: (a fc_list_type_(.{W3C-EBNF,ABNF,XBNF,Grammar},"","","")
         r_parts: .[NLG_rule_left-hand-side "::=" NLG_expression] )                   //A::=B
       (a fc_list_type_(.{ISO-EBNF},"","","") 
         r_parts: .[NLG_rule_left-hand-side "=" NLG_expression] );                    //A = B
       (a fc_list_type_(.{Yacc, Bison},"","","") 
         r_parts: .[NLG_rule_left-hand-side ":" NLG_expression Statement ";"] );      //A : B


Non-lexical-grammar_individual-gTerm = NLG_individual-gTerm,
  \. excl
     { (NLG_left-hand-side \. Non-lexical-grammar_non-terminal)
       (NLG_expression
         \. excl
            { (NLG_or-list  r_part: 2..* NLG_expression,
                 rc_: a fc_list_type_(.{Grammar},"","|","")                           //A | B
                      a fc_list_type_(.{Yacc,Bison},"","\n|","") )
              (NLG_list  r_part: 2..* NLG_expression, 
                 rc_: a fc_list_type_(.{Grammar},"","","") )                          //A   B
              (NLG_minus  r_part: 2 NLG_expression,
                 rc_: a fc_list_type_(.{W3C-EBNF},"","-","")                          //A - B
                      (a fc_list_type_(.{Grammar},"","-","") r_parts: .[]) )
              (NLG_delimited_expression  r_part: 1 NLG_expression,
                 rc_: a fc_list_type_(.{Grammar},"(","",")") )                        //(A)
              (NLG_repeated_item                                        
                 r_part: 1 NLG_expression  0..1 NLG_repetition_operator  0..1 Numeral,
                 rc_: (Zero-to-one_NLG-item  r_part: 1 NLG_expression,
                         rc_: a fc_list_type_(.{W3C-EBNF},"","","?")                  //A?
                              a fc_list_type_(.{ISO-EBNF,Grammar},"[","","]")         //[A]
                              a fc_list_type_(.{XBNF},"O(","",")")                    //O(A)
                              a fc_list_type_(.{ABNF},"*1","","") )                   //*1A
                     (Zero-to-many_NLG-item  r_part: 1 NLG_expression,
                        rc_: a fc_list_type_(.{W3C-EBNF},"","","*")                   //A*
                             a fc_list_type_(.{ISO-EBNF,Grammar},"{","","}")          //{A}
                             a fc_list_type_(.{XBNF),"#(","",")")                     //#(A)
                             a fc_list_type_(.{ABNF},"*","","") )                     //*A
                     (1-to-many_NLG-item  r_part: 1 NLG_expression,
                        rc_: a fc_list_type_(.{W3C-EBNF},"","","+")                   //A+
                             a fc_list_type_(.{ISO-EBNF,Grammar},"{","","}-")         //{A}-
                             a fc_list_type_(.{XBNF},"#(","",")")                     //#(A)
                             a fc_list_type_(.{ABNF},"1*","","") )                    //1*A
                     (N_NLG-item  r_part: 1 NLG_expression  1 Numeral,
                        rc_: (a fc_list_type_(.{ISO-EBNF},"","*","")
                               r_parts: .[numeral NLG_expression] )                   //N*A
                            (fc_list_type_({Grammar},"","","")  r_parts: .[]) )
                     (N-to-m_NLG-item  r_part: 1 NLG_expression  2 numeral,
                        rc_: a fc_list_type_(.{ABNF},"",.[numeral "*" numeral
                                                         NLG_expression],"" )         //N*M A
                             a fc_list_type_(.{Grammar},"",.[],"") )
              )
              (NLG_atomic-item
                \. excl
                   { (NLG_non-terminal  r_part: 1..* letter,
                       \. excl
                          { (NLG_start_non-terminal
                              r_part:  1 Uppercase_letter  0..* Letter,
                              rc_: (a fc_string_type_(.{W3C-EBNF},"","")
                                     r_part:  1 Uppercase_letter 0..* Letter
                                     r_parts: .[Uppercase_letter Letter] ) )
                            (NLG_non-start_non-terminal
                              rc_: a fc_string_type_({Grammar},"","") )
                          }
                     )
                     (NLG_terminal  r_part: 1..* Lexical-grammar_characters,
                       rc_: a fc_string_type_(.{Grammar},"","") )
                   } )
            } )//end of NLG_expression
       NLG_repetition_operator
       Numeral
     };


Lexical-grammar_rule
  r_part: 1 Lexical-grammar_expression  0..1 Statement,  
  rc_: (a f_list_type_(.{Lexical_parser_grammar},"","","")
         r_parts: .[Lexical-grammar_characters  Statement] )
       (a f_list_type_(.{Grammar},"","","") r_parts: .[Lexical-grammar_characters])

Lexical-grammar_individual-gTerm
  \. excl
     { (Lexical-grammar_characters
         \. (quoted_string r_part: 1..* letter,
              \. (single_quoted_string 
                    rc_: (a fc_string_type_(.{Grammar},"'","'")
                           r_part: 1..* f_character_with_escape_for("'") ) )
                 (double_quoted_string 
                    rc_: (a fc_string_type_(.{Grammar},'"','"')
                           r_part: 1..* f_character_with_escape_for('"') ) )
            )
            (character_or-set  r_part: 1..* character_range,
               rc_: a fc_string_type_(.{Grammar},"[","]") )
            (character_exclusion  r_part: 1..* character_range,
               rc_: a fc_string_type_(.{Grammar},"[^","]") )
            (letter_or_letter-code
               \. (letter  \. letter_range_1  letter_range_2)
                  (letter_code
                     \. (letter_code_with_hexadecimal  r_part: 1 hexadecimal,
                           \.  letter-code__range_1  letter-code_range_2
                           rc_: a fc_string_type_(.{Grammar},"#","") ) ) )
       )
       (character_range 
          \. (letter_range  r_part: 1 letter_range_1  0..1 letter_range_2,
                rc_: a fc_unspaced-list_type_(.{Grammar},"","-","") )
             (letter_range  r_part: 1 letter-code__range_1  0..1 letter-code_range_2;
                rc_: a fc_unspaced-list_type_(.{Grammar},"","-","") ) )
       (space_or_fully-delimited-comment
          \. (space  rc_: a fc_character_type_(.{Grammar}," ")
                          a fc_character_type_(.{Grammar},"\t")
                          a fc_character_type_(.{Grammar},"\n")
                          a fc_character_type_(.{Grammar},"\r")
             )
             (fully-delimited-comment  r_part: 0..* character,
                rc_: a fc_string_type_(.{Grammar},"/*","*/") ) )
       (comment
          \. fully-delimited-comment  r_part: 0..* character,
             (line-comment  rc_: a fc_string_type_(.{Grammar},"//","")) )
       //(delimitor \. (begin_mark) (end_mark) )
       (character \. letter)
     };




2.  Bison grammar for a generic parser of KRLs

The following code is not in FL but in GNU bison.
The high level ideas for this parser are given in this section of this article.


%token BOOLEAN_LIKE_VALUE  NUMBER  STRING  TIME  COORDINATE_LIKE_NUMBER
%token ID  NAME  VAR   TOKEN_OPERATOR
%token TOP_PHRASE_begin TOP_PHRASE_end 
%token INDIVIDUAL_TERM_begin INDIVIDUAL_TERM_end  PHRASE_TERM_begin PHRASE_TERM_end
%token PREFIX_FCT_LIKE_TERM_ARGS_begin PREFIX_FCT_LIKE_TERM_ARGS_end
%token PREFIX_FCT_LIKE_TERM_ARG_begin  PREFIX_FCT_LIKE_TERM_ARG_end
%token PREFIX_LIST_LIKE_TERM_begin     PREFIX_LIST_LIKE_TERM_end
%token POSTFIX_FCT_LIKE_TERM_ARGS_begin  POSTFIX_FCT_LIKE_TERM_ARGS_end
%token POSTFIX_LIST_LIKE_TERM_ARGS_begin POSTFIX_LIST_LIKE_TERM_ARGS_end
%token KEYED_INFIX_TERM_begin KEYED_INFIX_TERM_end
%token COLLECTION_begin  COLLECTION_end  ARGS_sep
%token MARKUP_TYPE_begin MARKUP_TYPE_end  MARKUP_VALUE_begin MARKUP_ATTRS_end
%nonassoc NON_ASSOC_OPERATOR_LEVEL_5
%nonassoc NON_ASSOC_OPERATOR_LEVEL_4
%nonassoc NON_ASSOC_OPERATOR_LEVEL_3  /* e.g., VAR__SET_TO      */
%nonassoc NON_ASSOC_OPERATOR_LEVEL_2
%nonassoc NON_ASSOC_OPERATOR_LEVEL_1
%right RIGHT_ASSOC_OPERATOR_LEVEL_5
%right RIGHT_ASSOC_OPERATOR_LEVEL_4 
%right RIGHT_ASSOC_OPERATOR_LEVEL_3  THEN  ELSE
%right RIGHT_ASSOC_OPERATOR_LEVEL_2
%right RIGHT_ASSOC_OPERATOR_LEVEL_1
%left  LEFT_ASSOC_OPERATOR_LEVEL_9 
%left  LEFT_ASSOC_OPERATOR_LEVEL_8   /* e.g., "<=>" "<=" "=>"  */
%left  LEFT_ASSOC_OPERATOR_LEVEL_7   /* e.g., "or"             */
%left  LEFT_ASSOC_OPERATOR_LEVEL_6   /* e.g., "and"            */
%left  LEFT_ASSOC_OPERATOR_LEVEL_5   /* e.g., '<' '>' '=' "=<" ">=" "!="  */
%left  LEFT_ASSOC_OPERATOR_LEVEL_4   /* e.g., '+' '-'          */
%left  LEFT_ASSOC_OPERATOR_LEVEL_3   /* e.g., '*' '/' '%'      */
%left  LEFT_ASSOC_OPERATOR_LEVEL_2   /* e.g., '!' NOT          */
%left  LEFT_ASSOC_OPERATOR_LEVEL_1  
%left  UMINUS
%expect 0
%start _TOP_PHRASES
%%
_TOP_PHRASES  : | TOP_PHRASE | TOP_PHRASE  TOP_PHRASE_sep ;

TOP_PHRASES   : TOP_PHRASE
              | TOP_PHRASES                   TOP_PHRASE_bce
              | TOP_PHRASES   TOP_PHRASE_sep  TOP_PHRASE_bce
              | TOP_PHRASES   TOP_PHRASE_sep  TERM_not1 ;
              ;

TOP_PHRASE    : TOP_PHRASE_bce  | TERM ;
TOP_PHRASE_bce: TOP_PHRASE_begin  TERM  TOP_PHRASE_end | TERM_bce ;


TERM     : TERM_1 | TERM_not1 ;
TERM_1   : TERM_bce | TOKEN ;
TERM_not1:   OPERATOR_bceOrToken          ARGS_c   %prec UMINUS 
         |   TERM_1  PREFIX_FCT_LIKE_TERM_ARGS_bce
         |  TERM_1 OPERATOR_bceOrToken    ARGS
         | TERM_1  ARGS_sep               ARGS ;

TERM_bce : INDIVIDUAL_TERM_begin  TERM  INDIVIDUAL_TERM_end      /* (...) */
         | PHRASE_TERM_begin      TERM  PHRASE_TERM_end          /* [...] */
         | PREFIX_LIST_LIKE_TERM_bce  | POSTFIX_FCT_LIKE_TERM_bce/* (_. ..) | (_  ...)  */
         | POSTFIX_LIST_LIKE_TERM_bce | KEYED_INFIX_TERM_bce     /* (._ ..) | (@..@op..)*/
         | COLLECTION_bce                                        /* .(...)  .{...}      */
         | MARKUP_bce ;                             /*  parts  */

PREFIX_FCT_LIKE_TERM_ARGS_bce:
    PREFIX_FCT_LIKE_TERM_ARGS_begin  ARGS_Nsup1  PREFIX_FCT_LIKE_TERM_ARGS_end
  | PREFIX_FCT_LIKE_TERM_ARG_begin   ARG_TERM_1  PREFIX_FCT_LIKE_TERM_ARG_end
  | PREFIX_FCT_LIKE_TERM_ARG_begin   ARG_TERM_1  ;
PREFIX_LIST_LIKE_TERM_bce : PREFIX_LIST_LIKE_TERM_begin  TERM_1_AS_OPERATOR ARGS/*(_. ...)*/
                            PREFIX_LIST_LIKE_TERM_end ;
POSTFIX_FCT_LIKE_TERM_bce : POSTFIX_FCT_LIKE_TERM_ARGS_begin  ARGS          /*(_  ...)*/
                            POSTFIX_FCT_LIKE_TERM_ARGS_end  TERM_1_AS_OPERATOR ;
POSTFIX_LIST_LIKE_TERM_bce: POSTFIX_LIST_LIKE_TERM_ARGS_begin  ARGS  TERM_1_AS_OPERATOR
                            POSTFIX_LIST_LIKE_TERM_ARGS_end ;               /*(._ ...)*/
KEYED_INFIX_TERM_bce      : KEYED_INFIX_TERM_begin ARGS  KEYED_OPERATOR  ARGS
                            KEYED_INFIX_TERM_end ;
COLLECTION_bce            : COLLECTION_begin  ARGS  COLLECTION_end ;   /*.(...)  .{...}*/
MARKUP_bce    : MARKUP_TYPE_begin/*e.g. ""*/
                MARKUP_PARTS  MARKUP_TYPE_end /* e.g. "" */ ;
  MARKUP_ATTRS: MARKUP_ATTRS  MARKUP_ATTR ;
   MARKUP_ATTR: ID  MARKUP_VALUE_begin  CONSTANT ;
  MARKUP_PARTS: MARKUP_PARTS  MARKUP_bce ;

KEYED_OPERATOR   : KEYED_OPERATOR_be | KEYED_OPERATOR_c ;
KEYED_OPERATOR_be: KEYED_OPERATOR_begin  KEYED_OPERATOR_c  _KEYED_OPERATOR_end ;
KEYED_OPERATOR_c : KEY  TERM_1_AS_OPERATOR ;
KEY              : REFERENCE ;

ARGS_Nsup1 : ARG_TERM_1 ARGS_sep  ARGS_cs | ARG_TERM_1 ARGS_c ; 
ARGS       : ARGS_cs | ARGS_c ;
ARGS_cs    : ARG     | ARGS_cs  ARGS_sep  ARG_TERM_1 ;
ARGS_c     : ARG_bce | ARGS_c             ARG_bce ;

ARG_TERM_1: ARG_bce | ARG_1c ; /*fct def/call such as isolated or not key + param name */
ARG_bce   : ARG_begin ARG_c ARG_end; 
ARG_1c    : TERM_1 
TERM_1_AS_OPERATOR : TERM_1 ;

OPERATOR_bceOrToken: OPERATOR_bce | OPERATOR_TOKEN ;
OPERATOR_bce       : OPERATOR_begin  TERM  OPERATOR_end | OPERATOR_prefix  TERM_1 ;
OPERATOR_TOKEN: NON_ASSOC_OPERATOR_LEVEL_5   | NON_ASSOC_OPERATOR_LEVEL_4
              | NON_ASSOC_OPERATOR_LEVEL_3   | NON_ASSOC_OPERATOR_LEVEL_2
              | NON_ASSOC_OPERATOR_LEVEL_1
              | RIGHT_ASSOC_OPERATOR_LEVEL_5 | RIGHT_ASSOC_OPERATOR_LEVEL_4
              | RIGHT_ASSOC_OPERATOR_LEVEL_3 | RIGHT_ASSOC_OPERATOR_LEVEL_2
              | RIGHT_ASSOC_OPERATOR_LEVEL_1 
              | LEFT_ASSOC_OPERATOR_LEVEL_9  | LEFT_ASSOC_OPERATOR_LEVEL_8
              | LEFT_ASSOC_OPERATOR_LEVEL_7  | LEFT_ASSOC_OPERATOR_LEVEL_6
              | LEFT_ASSOC_OPERATOR_LEVEL_5  | LEFT_ASSOC_OPERATOR_LEVEL_4
              | LEFT_ASSOC_OPERATOR_LEVEL_3  | LEFT_ASSOC_OPERATOR_LEVEL_2 
              | LEFT_ASSOC_OPERATOR_LEVEL_1  ;

TOKEN             : OPERATOR_TOKEN | TOKEN_NOT_OPERATOR ;
TOKEN_NOT_OPERATOR: REFERENCE | CONSTANT /*CONCRETE_VALUE*/;
REFERENCE         : ID | NAME | VAR ;
CONSTANT          : BOOLEAN_LIKE_VALUE | NUMBER | STRING | TIME
                  | COORDINATE_LIKE_NUMBER ;

%%