(* Copyright (C) 1992, Digital Equipment Corporation                         *)
(* All rights reserved.                                                      *)
(* See the file COPYRIGHT for a full description.                            *)

(* Created by J.Stolfi on Nov 1990.                            *)
(* Last modified on Sun Mar  1 14:21:51 PST 1992 by meehan     *)
(*      modified on Wed Feb 12 12:38:09 PST 1992 by muller     *)
(*      modified on Wed Nov 20 17:53:46 PST 1991 by stolfi     *)

INTERFACE SxSyntax;

(*
  A syntax table for reading and printing symbolic expressions.

  An SxSyntax.T is an object that describes a particular syntax
  for symbolic expressions.  Although very general in principle, the
  internal representation and the tools in this interface are
  biased towards the "official" symbolic expression syntax, plus a 
  limited range of "canonical" extensions and variants of it.
  
  Specifically, the client can easily modify the standard syntax 
  in the following ways:

    * specify filters that are automatically called by Read 
      and Print to transform specific REF types or lists beginning with 
      specific symbols;
      
    * provide custom routines for converting symbol names and numeric
      literals into arbitrary values

    * disable standard token types (text, character, etc.)

    * declare that certain characters should be treated as whitespace.

  With a little more effort, the client also can:

    * add new cases to the '#' notation, e.g. #'0000101000',
      and define other split-syntax characters besides '#'.

    * add new token types distinguished by their leading character,
      e.g. `foo  or  $error

  Index: symbolic expressions; expressions, symbolic

  *)

IMPORT List, Rd, Sx, SxSymbol, FWr, Wr, Thread;

TYPE
  CHARS = ARRAY OF CHAR;

TYPE
  T = Sx.Syntax;
  Private <: ROOT;

REVEAL
  Sx.Syntax = Private BRANDED OBJECT METHODS
      Copy(): T;
        (* 
          Returns a copy of the syntax table. *)

      Read(rd: Rd.T; root: SxSymbol.T): REFANY
        RAISES {Sx.ReadError, Rd.EndOfFile, Rd.Failure, Thread.Alerted};
        (*
          Reads one symbolic expression from the reader, leaving the
          reader positioned at the following character.  Symbols are
          interned relative to "root".
          
          Here is how Read works:  An SxSyntax.T object contains a
          /character dispatch table/ that maps characters to Parser
          routines.  Read takes the first character of the input stream,
          gets the corresponding parser from this table, and invokes
          it with the same arguments that were passed to Read.

          In any case, the output of the parser (a REFANY value) is
          a candidate for /input filtering/.  The SxSyntax.T object
          contains a pair of tables of /input filter/ routines, one keyed
          by SxSymbol.T and the other by typecode.  If the output of
          the Parser is a List.T beginning with one of the symbols in
          the first table, or is a REF value whose typecode is in the
          second table, Read passes it through the corresponding
          filtering routine.

          In any case, if the resulting REF is the special value
          /NoValue/, Read discards it and repeats the whole process
          again where the Parser left off.  Otherwise, Read returns
          that value as result.

          In the case of tokens characterized by a two-character prefix,
          such as '#|foo|#' in the standard syntax, the leading
          character ('#') is assigned a Parser that reads the next
          character, and picks the appropriate SubParser from a
          second-level character dispatch table.
          
          Read raises Rd.EndOfFile if it reaches the end of the reader
          while skipping any leading whitespace.  It raises Sx.ReadError
          if it the input expression is malformed or incomplete.
          *)

      ReadUntil(rd: Rd.T; delim: CHAR; root: SxSymbol.T): List.T
        RAISES {Sx.ReadError, Rd.Failure, Thread.Alerted};
        (*
          Repeatedly reads symbolic expressions from the reader, ignoring
          whitespace, until /delim/ is encountered, returning the expressions
          in a list.  Raises Sx.ReadError if any expression is malformed
          or incomplete, or end-of-file occurs before /delim/ is found. *)

      Print(
          fwr: FWr.T; 
          value: REFANY; 
          elision: Sx.Elision;
          root: SxSymbol.T;
        ) RAISES {Sx.PrintError, Wr.Failure, Thread.Alerted};
        (*
          Prints a symbolic expression to /fwr/, eliding the object at
          depths and lengths greater than that specified by "elision".
          Symbols in /value/ are printed relative to the given /root/
          (Sx.PrintError is raised if /value/ contains any symbol that is not
          a proper descendant of /root/).

          Here is how Print works:  First, the given value is a
          candidate for output filtering.  The SxSyntax.T object
          contains a pair of tables of /output filter/ routines, one
          keyed by SxSymbol.T and the other by typecode.  If the given
          value is a List.T beginning with one of the symbols in the
          first table, or is a REF value whose typecode is in the second
          table, Print passes it through the corresponding filtering
          routine.

          The SxSyntax.T object also contains two internal tables of
          specialized printing routines, one keyed by symbols, and the
          other by typecodes.  Print selects the appropriate routine
          by looking up the (possibly filtered) value in these two
          tables, in the same way it selected the output filter.  If
          this lookup fails, Print uses a default printing routine also
          stored in the SxSyntax.T object.
          
          In any case, Print calls the selected routine to pretty-print
          the given value to /fwr/, possibly calling Print recursively.
       *)
    END;

(**********************************************************)
(* THE STANDARD SYNTAX                                    *)
(**********************************************************)

PROCEDURE Standard(): T;
  (*
    Returns a new copy of the standard syntax table. *)

(**********************************************************)
(* FILTERS                                                *)
(**********************************************************)

VAR (*CONST*) 
  NoValue: REFANY;
    (* 
      Any of the input filters, parsers, and converters below may return
      NoValue to signify that the token it just parsed 
      should be ignored by Read.  *)

PROCEDURE SetInputListFilter(t: T; symbol: SxSymbol.T; filter: Filter);
  (*
    Register /filter/ as a data transformation procedure to be called
    by Read whenever it parses a list starting with the given symbol. *)

PROCEDURE SetInputRefFilter(t: T; typeCode: INTEGER; filter: Filter);
  (*
    Register /filter/ as a data transformation procedure to be called
    by Read whenever it parses a value with given TYPECODE. *)

PROCEDURE SetOutputListFilter(t: T; symbol: SxSymbol.T; filter: Filter);
  (*
    Register /filter/ as a data transformation procedure to be called
    by Print before printing a list starting with the given symbol. *)

PROCEDURE SetOutputRefFilter(t: T; typeCode: INTEGER; filter: Filter);
  (*
    Register /filter/ as a data transformation procedure to be called
    by Print before printing an object with given type code. *)

EXCEPTION FilterError(TEXT);

TYPE
  Filter = OBJECT METHODS
      apply(
        value: REFANY;       (* The value to be converted *)
        root: SxSymbol.T;    (* The root symbol *)
        syntax: T;           (* The syntax table *)
      ): REFANY RAISES {FilterError};
      (*
        A Filter is a procedure that converts REFANY's to REFANY's.
        It may raise FilterError when the /value/ is malformed. *)
    END;

(**********************************************************)
(* CHANGING THE SYMBOL/NUMBER SYNTAX                      *)
(**********************************************************)

PROCEDURE SetSymbolConverter(t: T; converter: SymbolConverter);
  (*
    Register a procedure to be used to parse symbol-like tokens *)

TYPE
  SymbolConverter = OBJECT METHODS
      apply(VAR name: CHARS; root, parent: SxSymbol.T; syntax: T): REFANY 
      RAISES {Sx.ReadError};
      (*
        A SymbolConverter is called to process symbol names.  This
        procedure is called once for each simple component /name/ of
        the symbol, from left to right, after all escapes and quotes
        have been resolved.
        
        The parameter /parent/ is the result of parsing all simple
        components of the symbol that come before /name/.  SymbolConverter
        should return the SxSymbol.T that corresponds to the notation
        /parent/./name/.  The parameter /root/ is the original symbol
        root passed to Read.

        The SymbolConverter may modify the contents of /name/ 
        if it needs temporary storage. 
      *)
    END;

PROCEDURE SetIntConverter(t: T; converter: IntConverter);
  (*
    Register a procedure for converting integer literals
    into REF values. *)

TYPE
  IntConverter = OBJECT METHODS
      apply(VAR chars: CHARS; syntax: T): REFANY RAISES {Sx.ReadError};
      (*
        An IntConverter is called after the numeric literal has been
        extracted from the input stream, and has been determined 
        to be an integer literal satisfying the syntax
        
          integer    => ["-"|"+"] digit+
                      | digit+ "_" ["-"|"+"] hexdigit+

          hexdigit   => digit 
                      | "a".."z"
                      | "A".."Z"

          digit      => "0".."9"

        In the case of literals with explicit base in the Modula-3 style
        ("8_007777"), the base is NOT guaranteed to be in [2..36], and
        the digits are NOT guaranteed to be in the range [0..base-1].
        The /apply/ routine should check these conditions, and raise
        Sx.ReadError if not true.

        The IntConverter may modify the contents of /chars/ 
        if it needs temporary storage. 
        *)
    END;

PROCEDURE SetFloatConverter(t: T; converter: FloatConverter);
  (*
    Register a procedure for converting "float" literals  into REF values. *)

TYPE
  FloatConverter = OBJECT METHODS
      apply(VAR chars: CHARS; syntax: T): REFANY RAISES {Sx.ReadError};
      (*
        A FloatConverter is called after the numeric literal has been
        extracted from the input stream, and has been determined 
        to be a floating-point literal satisfying the syntax
        
          float     => ["-"|"+"] digit* "." digit* [exponent]
                     | ["-"|"+"] digit+ exponent
 
          exponent  => ("E"|"e"|"d"|"D") [["-"|"+"] digit+]

        The FloatConverter may modify the contents of /chars/
        if it needs temporary storage. 
        *)
    END;

PROCEDURE SetOtherNumConverter(t: T; converter: OtherNumConverter);
  (*
    Register a procedure for converting number-like tokens 
    that are neither integers nor floats into REF values.
    (The default is to treat those tokens as syntax errors). *)

TYPE
  OtherNumConverter = OBJECT METHODS
      apply(VAR chars: CHARS; syntax: T): REFANY RAISES {Sx.ReadError};
      (*
        An OtherNumConverter is called to process tokens that look like
        numbers (in the sense that they begin with a digit, or a sign
        followed by a digit), but do not follow the syntax of integer
        and float literals, as described above.

        The OtherNumConverter may modify the contents of /chars/
        if it needs temporary storage. 
        *)
    END;

(**********************************************************)
(* SPECIAL CHARACTERS                                     *)
(**********************************************************)

(*
  The procedures 
|
|     MakeCharSpace 
|     SetCharParser 
|     MakeCharIllegal 
|     SetTwoCharParser
|     MakeTwoCharIllegal 
|     MakeCharSymNum
|
  specify what to do with tokens that begin with a given
  character. or two-character combination.
  
  Calling any of these procedures on a given character /ch/ undoes the effect
  of all previous calls of the other procedures for that character.  In
  particular, SetCharParser, SetTwoCharParser, MakeCharSpace, and
  MakeCharIllegal, and MakeTwoCharIllegal prevent further use of 
  /ch/ in symbol and numeric literals (unless it is escaped).

  On the other hand, these procedures do not affect the meaning of
  the second character of "sharp" tokens, or the contents of text
  and character literals.  For example, MakeCharSpace(t, '_') changes
  the meaning of _foo but not those of #_foo or "_foo" or '_' *)

PROCEDURE SetCharParser(t: T; ch: CHAR; parser: Parser);
  (*
    Specifies the parsing procedure to use for all tokens beginning
    with the given character /ch/.  Can be used to define new token types
    (e.g., `foo or $error) or to change the syntax and semantics of
    existing types (text and character literals).
    
    If /parser=NIL/, /ch/ will not be not allowed as the first
    character of any token, or as a constituent of symbols and numeric
    literals.  *)

PROCEDURE MakeCharIllegal(t: T; ch: CHAR);
  (*
    Equivalent to SetCharParser(t, ch, NIL)  *)

PROCEDURE MakeCharSpace(t: T; ch: CHAR);
  (*
    Specifies that /ch/ should be treated as a space.  *)

TYPE
  Parser = OBJECT METHODS
      apply(
        rd: Rd.T;
        ch: CHAR;
        root: SxSymbol.T;
        syntax: T
      ): REFANY RAISES {Sx.ReadError, Rd.Failure, Thread.Alerted};
      (*
        A Parser is a routine called by Read to parse a symbolic
        (sub-)expression that begins with the specified character.
        
        On entry to /apply/, "rd" is positioned right after the
        character that triggered it.  The /apply/ routine may consume
        zero or more additional characters from "rd" to compute its
        result, and it may recursively invoke Sx.Read.  If unexpected
        end-of-file or some other syntax error occurs during the
        parsing, /apply/ should raise Sx.Sx.ReadError.  *)
    END;

PROCEDURE SetTwoCharParser(t: T; ch, next: CHAR; parser: SubParser);
  (*
    Registers the procedure to be used for parsing tokens beginning
    with /ch/ and /next/. 
    
    If /parser=NIL/, the characters /ch/ and /next will not be allowed
    as the first two characters of any token.  
    
    In any case, /ch/ will not be allowed as a constituent of symbols
    and numeric literals.  Also cancels the effect of all previous calls
    SetCharParser(t, ch).  *)

PROCEDURE MakeTwoCharIllegal(t: T; ch, next: CHAR);
  (*
    Equivalent to SetTwoCharParser(t, ch, next, NIL).  *)

TYPE
  SubParser = OBJECT METHODS
      apply(
        rd: Rd.T;
        ch, next: CHAR;
        root: SxSymbol.T;
        syntax: T
      ): REFANY RAISES {Sx.ReadError, Rd.EndOfFile, Rd.Failure, Thread.Alerted};
      (*
        A SubParser is a routine called by Read to parse the body
        of tokens that begin with a specific two-character combination.
        
        On entry to the SubParser, "rd" is positioned right after the
        two-character combination that triggered it (/ch/ followed
        by /next/).  The routine may consume zero or more additional
        characters from "rd" to compute its result, and it may
        recursively invoke Sx.Read.  If an unexpected end-of-file or some
        other syntax error is encountered during this parsing, /apply/
        should raise Sx.Sx.ReadError.  *)
    END;
      
PROCEDURE MakeCharSymNum(t: T; ch: CHAR);
  (*
    Specifies that /ch/ is allowed as part of symbol names or 
    numeric literals (even as the leading character).  *)

PROCEDURE SetSymNumParser(t: T; parser: Parser);
  (*
    Register a procedure to be used to read and parse the tokens that
    look like symbols or numbers.  This call permanently overrides the
    effect of SetSymbolConverter, SetIntConverter, and SetFloatConverter. *)

(**********************************************************)
(* PRINTING SPECIAL VALUES                                *)
(**********************************************************)

PROCEDURE SetRefPrinter(t: T; typeCode: INTEGER; printer: Printer);
  (*
    Registers the given printer routine for REF values with given type
    code. *)

PROCEDURE SetListPrinter(t: T; symbol: SxSymbol.T; printer: Printer);
  (*
    Registers the given printer routine for List.T values 
    that begin with the given symbol. *)

PROCEDURE SetDefaultPrinter(t: T; printer: Printer);
  (*
    Registers the printer routine to be used for REF values that 
    have no printer routine on their own. *)

TYPE
  Printer = OBJECT METHODS
      apply(
        fwr: FWr.T;
        value: REFANY;
        elision: Sx.Elision;
        root: SxSymbol.T;
        syntax: T;
      ) RAISES {Sx.PrintError, Wr.Failure, Thread.Alerted};
      (*
        A Printer is a procedure that is called by Print to output
        objects of a specific REF type, or lists that begin with a
        specific symbol.
        
        The /apply/ routine need not flush the writer, and should not add
        any whitespace space before or after the printed value. *)
    END;

VAR (*CONST*)
  IllegalValuePrinter: Printer;
    (* A Printer that always raises Sx.PrintError. *) 

(**********************************************************)
(* INDENTATION                                            *)
(**********************************************************)

PROCEDURE SetIndentation(t: T; indentation: CARDINAL);
  (*
    Specifies the extra indentation per level to use when printing
    multi-line expresions. *)

CONST StandardIndentation = 2;
    (* The indentation for the standard syntax *)

END SxSyntax.
