;*=====================================================================*/
;*    serrano/prgm/project/scribe/scribeinfo/lexer.scm                 */
;*    -------------------------------------------------------------    */
;*    Author      :  Manuel Serrano                                    */
;*    Creation    :  Thu Oct 11 09:24:03 2001                          */
;*    Last change :  Wed Jan  9 16:11:56 2002 (serrano)                */
;*    Copyright   :  2001-02 Manuel Serrano                            */
;*    -------------------------------------------------------------    */
;*    The lexer for texinfo format.                                    */
;*=====================================================================*/

;*---------------------------------------------------------------------*/
;*    The module                                                       */
;*---------------------------------------------------------------------*/
(module __scribeinfo_lexer
   (import __scribeinfo_parser
	   __scribeinfo_param)
   (export lexer
	   (string-from string mark delta)
	   (string-from-at string start mark delta)
	   (string-until string mark delta)
	   (string-until-at string start mark delta)
	   (string-char-after string char)
	   (string-prefix? left::bstring right::bstring)
	   (nthword str nth . delims)
	   (nthword2 str nth . delims)
	   (string-strip str char)
	   (string-skip str . delims)
	   (string-replace str1 c1 c2)))

;*---------------------------------------------------------------------*/
;*    *reserved-keywords* ...                                          */
;*---------------------------------------------------------------------*/
(define *reserved-keywords*
   '("AA" "aa" "AE" "ae"))

(define *argument-commands*
   '("dots"
     "equiv"
     "error"
     "TeX"
     "bullet"
     "email"
     "url"
     "b"
     "strong"
     "i"
     "sc"
     "w"
     "r"
     "t"
     "value"
     "copyright"
     "code"
     "var"
     "kbd"
     "key"
     "emph"
     "dfn"
     "samp"
     "ref"
     "xref"
     "pxref"
     "inforef"
     "expansion"
     "result"
     "equiv"
     "print"
     "file"
     "footnote"
     "math"))

(define *dual-argument-commands*
   '("title"
     "subtitle"
     "author"))

(define *line-commands*
   '("center"
     "cindex" "vindex" "pindex" "printindex"
     "node"
     "setfilename" "settitle"
     "syncodeindex" "synindex" "footnotestyle"
     "paragraphindent"
     "finalout"
     "table" "table @code" "table @bullet" "table @emph" "table @asis"
     "table @t" "end table" "table @r" "table @minus"
     "itemize" "itemize @bullet" "itemize @minus"
     "end itemize"
     "enumerate" "end enumerate"
     "item" "itemx"
     "menu" "end menu"
     "finalout"
     "paragraphindent"
     "set"
     "include"
     "dircategory"
     "direntry" "end direntry"
     "setchapternewpage"
     "shorttitlepage"
     "titlepage" "end titlepage"
     "title"
     "subtitle"
     "author"
     "page"
     "vskip"
     "macro" "rmacro" "end macro"
     "defn" "end defn"
     "deffn" "end deffn"
     "deffnx"
     "deftp" "end deftp"
     "defmethod" "end defmethod"
     "smalllisp" "end smalllisp"
     "lisp" "end lisp"
     "smallexample" "end smallexample"
     "example" "end example"
     "format" "end format"
     "display" "end display"
     "quotation" "end quotation"
     "chapter" "section" "subsection" "subsubsection"
     "unnumbered" "unnumberedsec" "unnumberedsubsubsec"
     "contents" "summarycontents"
     "flushleft" "end flushleft"
     "flushright" "end flushright"
     "bye"
     "refill"
     "noindent"
     "sp"))

(define *ignore-start-commands*
   '("ignore"
     "ifinfo"
     "iftex"
     "ifhtml"))

(define *ignore-stop-commands*
   '("end ignore"
     "end ifinfo"
     "end iftex"
     "end ifhtml"))
     
;*---------------------------------------------------------------------*/
;*    Mark                                                             */
;*---------------------------------------------------------------------*/
(define *argument-mark* (cons 1 1))
(define *line-mark* (cons 1 2))
(define *ignore-start-mark* (cons 1 3))
(define *ignore-stop-mark* (cons 1 4))

;*---------------------------------------------------------------------*/
;*    Initialize the keywords                                          */
;*---------------------------------------------------------------------*/
(for-each (lambda (k)
	     (let ((sym (string->symbol (string-append "@" k "{")))
		   (val (string->symbol (string-upcase k))))
		(putprop! sym 'info *argument-mark*)
		(putprop! sym 'info-value val)))
	  *argument-commands*)
;; we have to take special care to title because it is a polymorphic
;; command. It may be used with arguments in brackets or with arguments
;; found on the command line!
(for-each (lambda (k)
	     (let ((sym (string->symbol (string-append "@" k "{")))
		   (val (string->symbol (string-append "A" (string-upcase k)))))
		(putprop! sym 'info *argument-mark*)
		(putprop! sym 'info-value val)))
	  *dual-argument-commands*)
   
(for-each (lambda (k)
	     (let ((sym (string->symbol (string-append "@" k)))
		   (val (string->symbol (strip@ (string-upcase k)))))
		(putprop! sym 'info *line-mark*)
		(putprop! sym 'info-value val)))
	  *line-commands*)
(for-each (lambda (k)
	     (let ((sym (string->symbol (string-append "@" k))))
		(putprop! sym 'info *ignore-start-mark*)))
	  *ignore-start-commands*)
(for-each (lambda (k)
	     (let ((sym (string->symbol (string-append "@" k))))
		(putprop! sym 'info *ignore-stop-mark*)))
	  *ignore-stop-commands*)

;*---------------------------------------------------------------------*/
;*    lexer ...                                                        */
;*---------------------------------------------------------------------*/
(define lexer
   (regular-grammar ()

      ;; texinfo identification
      ((bol (: "\\input" (* all)))
       (ignore))
      
      ;; comment
      ((: "@" (or "c\n" (: (or "c " "comment ") (* all) (? #\Newline))))
       (ignore))

      ;; brackets
      (#\{
       (list 'BRAOPEN (the-coord (the-port))))
      (#\}
       (list 'BRACLO (the-coord (the-port))))
      
      ;; special commands
      ("@ "
       (list 'STRING (the-coord (the-port)) " "))
      ("@!"
       (list 'EXCLAMATION (the-coord (the-port))))
      ("@\""
       (list 'UMLAUT (the-coord (the-port))))
      ("@'"
       (list 'ACUTE (the-coord (the-port))))
      ("@*"
       (list 'LINEBREAK (the-coord (the-port))))
      ("@,{C}"
       (list 'CEDILLA (the-coord (the-port))))
      ("@-"
       (ignore))
      ("@."
       (list 'PERIODE (the-coord (the-port))))
      ("@:"
       (list 'STRING (the-coord (the-port)) " "))
      ("@="
       (list 'MACRON (the-coord (the-port))))
      ("@?"
       (list 'QUESTIONMARK (the-coord (the-port))))
      ("@@"
       (list 'STRING (the-coord (the-port)) "@"))
      ("@^"
       (list 'CIRCUMFLEX (the-coord (the-port))))
      ("@`"
       (list 'GRAVE (the-coord (the-port))))
      ("@{"
       (list 'STRING (the-coord (the-port)) "{"))
      ("@}"
       (list 'STRING (the-coord (the-port)) "}"))
      ("@~"
       (list 'TILDE (the-coord (the-port))))
      ("@L{}"
       (list '/L (the-coord (the-port))))
      ("@l{}"
       (list '/l (the-coord (the-port))))
      ("@O{}"
       (list '/O (the-coord (the-port))))
      ("@o{}"
       (list '/o (the-coord (the-port))))
      ("@OE{}"
       (list '@OE (the-coord (the-port))))
      ("@oe{}"
       (list '@oe (the-coord (the-port))))
      
      ;; commands with arguments
      ((: #\@ (+ (in ("azAZ"))) #\{)
       (let ((sym (the-symbol)))
	  (cond
	     ((not (eq? (getprop sym 'info) *argument-mark*))
	      (error/location "lexer(info)"
			      "Unknown argument command"
			      (the-string)
			      (input-port-name (the-port))
			      (input-port-position (the-port))))
	     (else
	      (list (getprop sym 'info-value) (the-coord (the-port)))))))

      ;; commands without arguments
      ((: #\@ (+ (in ("azAZ"))) #\Newline)
       (let ((sym (string->symbol (the-substring 0 (-fx (the-length) 1)))))
	  (cond
	     ((eq? (getprop sym 'info) *ignore-start-mark*)
	      (if (memq sym *if-info*)
		  (ignore)
		  (let* ((what (the-substring 1 (-fx (the-length) 1)))
			 (stop (string-append "@end " what)))
		     (let loop ((line (read-line (the-port))))
			(cond
			   ((eof-object? line)
			    (error "lexer(info)"
				   (string-append "Unclosed `" stop "'")
				   line))
			   ((string=? line stop)
			    (ignore))
			   (else
			    (loop (read-line (the-port)))))))))
	     ((not (eq? (getprop sym 'info) *line-mark*))
	      (error/location "lexer(info)"
			      "Unknown plain command"
			      (the-string)
			      (input-port-name (the-port))
			      (input-port-position (the-port))))
	     (else
	      (list (getprop sym 'info-value)
		    (the-coord (the-port))
		    (the-substring 0 (-fx (the-length) 1)))))))

      ;; commands without arguments at the end of input
      ((: #\@ (+ (in ("azAZ"))))
       (let ((sym (the-symbol)))
	  (cond
	     ((not (eq? (getprop sym 'info) *line-mark*))
	      (error/location "lexer(info)"
			      "Unknown plain command"
			      (the-string)
			      (input-port-name (the-port))
			      (input-port-position (the-port))))
	     (else
	      (list (getprop sym 'info-value)
		    (the-coord (the-port))
		    (the-string))))))

      ;; commands without arguments but with a text
      ((: #\@ (+ (in ("azAZ"))) (in " \t") (* all) #\Newline)
       (let ((sym (string->symbol (the-substring 0 (-fx (the-length) 1)))))
	  (cond
	     ((eq? (getprop sym 'info) *ignore-stop-mark*)
	      (ignore))
	     ((eq? (getprop sym 'info) *line-mark*)
	      (list (getprop sym 'info-value)))
	     (else
	      (let* ((str (the-string))
		     (word (string-until str #\space 0))
		     (k (string->symbol word)))
		 (cond
		    ((eq? (getprop k 'info) *ignore-start-mark*)
		     (if (memq k *if-info*)
			 (ignore)
			 (let* ((what (substring word 1 (string-length word)))
				(stop (string-append "@end " what))
				(lstop (string-length stop)))
			    (let loop ((line (read-line (the-port))))
			       (cond
				  ((eof-object? line)
				   (error "lexer(info)"
					  (string-append "Unclosed `" stop "'")
					  line))
				  ((substring=? line stop lstop)
				   (ignore))
				  (else
				   (loop (read-line (the-port)))))))))
		    ((eq? (getprop k 'info) *ignore-stop-mark*)
		     (ignore))
		    ((not (eq? (getprop k 'info) *line-mark*))
		     (error/location "lexer(info)"
				     "Unknown line command"
				     (the-string)
				     (input-port-name (the-port))
				     (input-port-position (the-port))))
		    (else
		     (let* ((rest (the-substring (+fx (string-length word) 1)
						 (-fx (the-length) 1)))
			    (coord (cons (input-port-name (the-port))
					 (-fx (input-port-position (the-port))
					      (+fx 1 (string-length rest))))))
			;; we have to parse the rest of the line with the
			;; regular texinfo parser...
			(with-input-from-string rest
			   (lambda ()
			      (list (getprop k 'info-value)
				    coord
				    (parse-string rest coord))))))))))))

      ;; text
      ((+ (out "@{}\n"))
       (list 'STRING (the-coord (the-port)) (the-string)))

      ;; Newlines
      ((+ #\Newline)
       (if (=fx (the-length) 1)
	   (list 'STRING (the-coord (the-port)) "\n")
	   (list 'LINEBREAK (the-coord (the-port)) (the-length))))
      
      (else
       (let ((c (the-failure)))
	  (if (eof-object? c)
	      c
	      (error/location "lexer(info)"
			      "Illegal character"
			      c
			      (input-port-name (the-port))
			      (input-port-position (the-port))))))))

;*---------------------------------------------------------------------*/
;*    string-from ...                                                  */
;*    -------------------------------------------------------------    */
;*    Skip all characters until MARK, returns the rest of the          */
;*    string. Delta is how many characters to skip from the marker.    */
;*---------------------------------------------------------------------*/
(define (string-from string mark delta)
   (let ((len (string-length string)))
      (let loop ((i 0))
	 (cond
	    ((=fx i len)
	     "")
	    ((char=? (string-ref string i) mark)
	     (substring string (+fx i delta) len))
	    (else
	     (loop (+fx i 1)))))))

;*---------------------------------------------------------------------*/
;*    string-from-at ...                                               */
;*    -------------------------------------------------------------    */
;*    Skip all characters until MARK, returns the rest of the          */
;*    string. Delta is how many characters to skip from the marker.    */
;*---------------------------------------------------------------------*/
(define (string-from-at string start mark delta)
   (let ((len (string-length string)))
      (let loop ((i start))
	 (cond
	    ((=fx i len)
	     "")
	    ((char=? (string-ref string i) mark)
	     (substring string (+fx i delta) len))
	    (else
	     (loop (+fx i 1)))))))

;*---------------------------------------------------------------------*/
;*    string-until ...                                                 */
;*    -------------------------------------------------------------    */
;*    Skip all characters after MARK, returns the rest of the          */
;*    string. Delta is how many characters to skip from the marker.    */
;*---------------------------------------------------------------------*/
(define (string-until string mark delta)
   (string-until-at string 0 mark delta))

;*---------------------------------------------------------------------*/
;*    string-until-at ...                                              */
;*    -------------------------------------------------------------    */
;*    Skip all characters after MARK, returns the rest of the          */
;*    string. Delta is how many characters to skip from the marker.    */
;*---------------------------------------------------------------------*/
(define (string-until-at string start mark delta)
   (let ((len (string-length string)))
      (let loop ((i start))
	 (cond
	    ((>=fx i len)
	     "")
	    ((char=? (string-ref string i) mark)
	     (substring string start (+fx i delta)))
	    (else
	     (loop (+fx i 1)))))))

;*---------------------------------------------------------------------*/
;*    string-char-after ...                                            */
;*    -------------------------------------------------------------    */
;*    Return the char following the first occurrence of CHAR in        */
;*    STRING.                                                          */
;*---------------------------------------------------------------------*/
(define (string-char-after string char)
   (let ((len (-fx (string-length string) 1)))
      (let loop ((i 0))
	 (cond
	    ((>=fx i len)
	     (error "string-char-after:Can't find char" string char))
	    ((char=? (string-ref string i) char)
	     (string-ref string (+fx i 1)))
	    (else
	     (loop (+fx i 1)))))))

;*---------------------------------------------------------------------*/
;*    string-prefix? ...                                               */
;*    -------------------------------------------------------------    */
;*    Is LEFT a prefix of RIGHT?                                       */
;*---------------------------------------------------------------------*/
(define (string-prefix? left::bstring right::bstring)
   (substring=? left right (string-length left)))

;*---------------------------------------------------------------------*/
;*    nthword ...                                                      */
;*    -------------------------------------------------------------    */
;*    Returns the NTH word of the STR.                                 */
;*---------------------------------------------------------------------*/
(define (nthword str nth . delims)
   (let ((delims (if (null? delims)
		     '(#\tab #\Newline #\space)
		     delims))
	 (len (string-length str)))
      (define (find-end-word start)
	 (cond
	    ((=fx start len)
	     len)
	    ((memq (string-ref str start) delims)
	     start)
	    (else
	     (find-end-word (+fx 1 start)))))
      (define (skip-blank start delims)
	 (cond
	    ((=fx start len)
	     #f)
	    ((memq (string-ref str start) delims)
	     (skip-blank (+fx 1 start) delims))
	    (else
	     start)))
      (let loop ((num 0)
		 (start (skip-blank 0 '(#\tab #\Newline #\space))))
	 (cond
	    ((not start)
	     #f)
	    ((=fx num nth)
	     (substring str start (find-end-word start)))
	    (else
	     (loop (+fx num 1) (skip-blank (find-end-word start) delims)))))))

;*---------------------------------------------------------------------*/
;*    nthword2 ...                                                     */
;*    -------------------------------------------------------------    */
;*    Returns the NTH word of the STR.                                 */
;*    -------------------------------------------------------------    */
;*    nthword2 consider that there is only one separator between       */
;*    words.                                                           */
;*---------------------------------------------------------------------*/
(define (nthword2 str nth . delims)
   (let ((delims (if (null? delims)
		     '(#\tab #\Newline #\space)
		     delims))
	 (len (string-length str)))
      (define (find-end-word start)
	 (cond
	    ((=fx start len)
	     len)
	    ((memq (string-ref str start) delims)
	     start)
	    (else
	     (find-end-word (+fx 1 start)))))
      (define (skip-blank start delims)
	 (cond
	    ((=fx start len)
	     #f)
	    ((memq (string-ref str start) delims)
	     (+fx 1 start))
	    (else
	     start)))
      (let loop ((num 0)
		 (start (skip-blank 0 '(#\tab #\Newline #\space))))
	 (cond
	    ((not start)
	     #f)
	    ((=fx num nth)
	     (substring str start (find-end-word start)))
	    (else
	     (loop (+fx num 1) (skip-blank (find-end-word start) delims)))))))

;*---------------------------------------------------------------------*/
;*    the-coord ...                                                    */
;*---------------------------------------------------------------------*/
(define (the-coord port)
   (cons (input-port-name port) (input-port-position port)))

;*---------------------------------------------------------------------*/
;*    string-skip ...                                                  */
;*    -------------------------------------------------------------    */
;*    Skip all chars in CHARS.                                         */
;*---------------------------------------------------------------------*/
(define (string-skip str . delims)
   (let ((len (string-length str)))
      (let loop ((i 0))
	 (cond
	    ((=fx i len)
	     "")
	    ((memq (string-ref str i) delims)
	     (loop (+fx i 1)))
	    (else
	     (substring str i len))))))
   
;*---------------------------------------------------------------------*/
;*    string-strip ...                                                 */
;*    -------------------------------------------------------------    */
;*    Remove all occurences of CHAR in STR.                            */
;*---------------------------------------------------------------------*/
(define (string-strip str char)
   (let ((len (string-length str)))
      (let loop ((i 0)
		 (pos 0))
	 (cond
	    ((=fx i len)
	     (if (null? pos)
		 str
		 (let* ((nlen (-fx len pos))
			(res (make-string nlen)))
		    (let loop ((r 0)
			       (w 0))
		       (cond
			  ((=fx r len)
			   res)
			  ((char=? (string-ref str r) char)
			   (loop (+fx r 1) w))
			  (else
			   (string-set! res w (string-ref str r))
			   (loop (+fx r 1) (+fx w 1))))))))
	    ((char=? (string-ref str i) char)
	     (loop (+fx i 1) (+fx pos 1)))
	    (else
	     (loop (+fx i 1) pos))))))
		 
;*---------------------------------------------------------------------*/
;*    strip@ ...                                                       */
;*---------------------------------------------------------------------*/
(define (strip@ str)
   (string-strip str #\@))

;*---------------------------------------------------------------------*/
;*    string-replace ...                                               */
;*---------------------------------------------------------------------*/
(define (string-replace str1 c1 c2)
   (let* ((len (string-length str1))
	  (str2 (make-string len)))
      (let loop ((r 0))
	 (if (=fx r len)
	     str2
	     (let ((c (string-ref str1 r)))
		(if (char=? c c1)
		    (string-set! str2 r c2)
		    (string-set! str2 r c))
		(loop (+fx r 1)))))))
		 
	    
