LaTeX Verbatim filter

By Martin Odersky

The following program formats Scala code embedded in LaTeX files. It translates all sections in a LaTeX source which are delimited by

\verb@...@ 

or by

\begin{verbatim} 
... 
\end{verbatim}.

The translation maps into environments and commands which are configurable by the user. It highlights keywords and also performs some character substitutions. More details are found in the comment below.

The program is a fairly straightforward adaptation of a Java program. It works only with simple data structures: files and arrays of bytes. Compared to the original program it is nevertheless much nicer, mainly because of the use of nested functions and tail-recursion.

import java.io._
 
/**  A text formatter for scala programs. To run it, use either 
 *   one of the following commands
 *
 *   java verbfilterScala <in.src> <in.tex>
 *   scala verbfilterScala <in.src> <in.tex>
 *  
 * This would translate the file `in.src' into `in.tex'. 
 * The program highlighs reserved words and line comments.
 * It understands the following formatting commands:
 *  
 *   \=    tab set
 *   \>    advance to next set tab
 *   $     toggle math mode
 *   \R    red color, translates to \color{red}
 *   \B    blue color, translates to \color{blue}
 *   \S    black color, translates to \color{black}
 *   \G    greenn color, translates to \color{green}
 *   \\    a single backslash `\'
 *   \$    a dollar sign `$'
 *  
 *  The source file `in.src' needs to contain definitions of the following 
 *  LaTeX commands:
 *
 *    \vem            Highlight keyword
 *    \prog{#1}       argument is inline program code
 *    \linecomment    Highlight line comment
 *
 *  It also needs to define the following environment:
 *
 *    program         for multi-line program code.
 *   
 *  The program environment needs to refine LaTeX's tabbing environment, so that
 *  \= and \> are defined.
 *  
 *  Here are examples of these LaTeX definitions:
 *
 * \newcommand{\prog}[1]     {\darkbluetext{\sf #1}}
 * \newcommand{\vem}         {\color{violet}}
 * \newcommand{\linecomment} {\color{darkgreen}}
 * \newenvironment{program}  {\begin{quote} \small \darkblue \sf \begin{tabbing}}
 *                           {\end{tabbing} \end{quote}}
 * \definecolor{darkblue}{rgb}{0, 0, 0.4}
 * \definecolor{darkgreen}{rgb}{0, 0.3, 0.0}
 */
object verbfilterScala {
 
  /** Increment for tabulator characters 
   */
  final val TABINC = 8
 
  /** Output commands, to be defined as latex commands in source file
   */
  final val progString  = "\\prog"
  final val reservedString  = "\\vem"
  final val lineCommentString = "\\linecomment"
  final val beginProgramString = "\\begin{program}"
  final val endProgramString = "\\end{program}"
 
  /** Strings significant for input 
   */
  final val beginVerbatim = "\\begin{verbatim}"
  final val endVerbatim = "\\end{verbatim}"
  final val verb = "\\verb"
 
  /** Reserved words, sorted alphabetically 
   */
  val reserved = Array(
    "abstract", "case", "catch", "class", "def", 
    "do", "else", "extends", "false", "final", "finally",
    "for", "if", "implicit", "import", "match", "mixin",
    "new", "null", "object", "override", "package", 
    "private", "protected", "requires", "return", "sealed", 
    "super", "this", "trait", "true", "try", 
    "type", "val", "var", "while", "with", 
    "yield")
 
 
  /** line counters */
  var lineCount = 0
  var verbLine = 0
 
  /** the main process method */
  def process(buf: Array[byte], out: OutputStream) {
 
    /** Write string to standard output. Note the for-comprehension
     *  over strings. This works because strings are implicitly
     *  convertible to sequences. */
    def puts(s: String) { for (val c <- s) out.write(c) }
 
    /** write character to standard output */
    def putc(c: byte) { out.write(c) }
 
    /** Do buffer contents at given offset start with given string */
    def startsWith(offset: int, s: String) = {
      var i = 0
      while (i < s.length && buf(offset+i) == s.charAt(i)) i = i + 1
      i == s.length
    }
 
    /** Compare buffer contents at given offset with given string */
    def compare(offset: int, key: String): int = {
      var i = offset
      var j = 0
      val l = key.length
      while (i < buf.length && j < l) {
        val bch = buf(i).toChar
        val kch = key.charAt(j)
        if (bch < kch) return -1
        else if (bch > kch) return 1
        i = i + 1
        j = j + 1
      }
      if (j < l) -1
      else if (i < buf.length &&
               ('A' <= buf(i) && buf(i) <= 'Z' ||
                'a' <= buf(i) && buf(i) <= 'z' ||
                '0' <= buf(i) && buf(i) <= '9' ||
                buf(i) == '_')) 1
      else 0
    }
 
    /** If buffer at offset `i' is a keyword, return its index
     *  in the `reserved' table; otherwise return -1 */
    def keyIndex(i: int): int = {
      var lo = 0
      var hi = reserved.length - 1
      while (lo <= hi) {
        val mid = (hi + lo) / 2
        val diff = compare(i, reserved(mid))
        if (diff < 0) hi = mid - 1
        else if (diff > 0) lo = mid + 1
        else return mid
      }
      -1
    }
 
    /** Skip blanks and at most one newline; return following index */
    def skipLeadingBlanks(i: int): int = 
      if (buf(i) == ' ') skipLeadingBlanks(i+1) 
      else if (buf(i) == '\n') { lineCount = lineCount + 1; i+1 }
      else i
 
    /** Process a `verbatim' or `verb' section.
     *  @param   i    The offset in `buf' where it starts
     *  @param   end  The string delimiting the section at the end
     *  @return  index after the section */ 
    def processVerbatim(i: int, end: String): int = {
 
      val END = end.charAt(0)
 
      /** Process line comment starting at `i'; return mext following index */
      def processLineComment(i: int): int =
        if (buf(i) == '\n' || startsWith(i, end)) i
        else { putc(buf(i)); processLineComment(i+1) }
 
      /** Process math region starting at `i'; return next following index. */
      def processMath(i: int): int = {
        putc(buf(i))
        if (buf(i) == '$') i+1 else processMath(i+1)
      }
 
      /** Process initial whitespace of line; return next following index */
      def processLeadingWhitespace(i: int): int = {
        def loop(i: int, col: int): int = {
          if (buf(i) == ' ') {
            puts("~~")
            loop(i+1, col+1)
          } else if (buf(i) == '\t') {
            var c = col
            do {
              puts("~~"); c = c+1
            } while (c % TABINC != 0)
            loop(i+1, c)
          } else {
            i
          }
        }
        loop(i, 0)
      }
 
      /** Emit `pre' and then process normal Scala code starting at index `i'.
       *  Return next following index */
      def processCode(pre: String, i: int): int = {
        puts(pre)
        buf(i) match {
          case END if startsWith(i, end) =>
            i + end.length
          case '\n' => 
            lineCount = lineCount+1
            if (startsWith(i+1, end)) processCode("\n", i+1)
            else {
              puts("\\\\")
              processCode("", 
                processLeadingWhitespace(
                  if (buf(i+1) == '\n') {
                    puts("[0.5em]")
                    lineCount = lineCount+1
                    i+2
                  } else i+1))
            }
          case ' ' => 
            processCode("~", i+1)
          case '^' => 
            processCode("\\^~$\\!\\!$", i+1)
          case '&' => 
            processCode("\\&", i+1)
          case '*' => 
            processCode("$*$", i+1)
          case '%' => 
            processCode("$\\%$", i+1)
          case '_' => 
            processCode("\\_", i+1)
          case '~' => 
            processCode("\\~~$\\!\\!$", i+1)
          case '{' => 
            processCode("{\\small\\{}", i+1)
          case '}' => 
            processCode("{\\small\\}}", i+1)
          case '[' => 
            processCode("$[$", i+1)
          case ']' => 
            processCode("$]$", i+1)
          case '(' => 
            processCode("$($", i+1)
          case ')' => 
            processCode("$)$", i+1)
          case ':' => 
            if (i > 0 && Character.isJavaIdentifierPart(buf(i-1).toChar))
              puts("\\,")
            processCode("{\\rm :}", i+1)
          case '<' => 
            if (buf(i+1) == '=') processCode("$\\leq$", i+2)
            else if (buf(i+1) == '-') processCode("$\\leftarrow$", i+2)
            else if (buf(i+1) == '<') processCode("$<\\!$", i+1)
            else processCode("$<$", i+1)
          case '>' => 
            if (buf(i+1) == '=') processCode("$\\geq$", i+2)
            else if (buf(i+1) == '>') processCode("$>\\!$", i+1)
            else processCode("$>$", i+1)
          case '=' => 
            if (buf(i+1) == '=') processCode("$==$", i+2)
            else if (buf(i+1) == '>') processCode("$\\Rightarrow$", i+2)
            else processCode("=", i+1)
          case '/' =>
            if (buf(i+1) == '/') processCode(lineCommentString+"//", i+2)
            else processCode("/", i+1)
          case '-' => 
            if (buf(i+1) == '>') processCode("$\\rightarrow$", i+2)
            else processCode("$-$", i+1)
          case '+' => 
            processCode("$+$", i+1)
          case '|' => 
            processCode("$\\,|$", i+1)
          case '#' => 
            processCode("\\#", i+1)
          case '\\' => 
            if (buf(i+1) == '=') processCode("\\=", i+2)
            else if (buf(i+1) == '>') processCode("\\>", i+2)
            else if (buf(i+1) == '$') processCode("\\$", i+2)
            else if (buf(i+1) == 'R') processCode("\\color{red}", i+2)
            else if (buf(i+1) == 'S') processCode("\\color{black}", i+2)
            else if (buf(i+1) == 'B') processCode("\\color{blue}", i+2)
            else if (buf(i+1) == 'G') processCode("\\color{green}", i+2)
            else processCode("$\\backslash$", i+1)
          case '$' => 
            puts("$")
            processCode("", processMath(i+1))
          case _ =>
            if (i == 0 || !Character.isJavaIdentifierPart(buf(i-1).toChar)) {
              val k = keyIndex(i)
              if (k >= 0) processCode("{"+reservedString+" "+reserved(k)+"}", i+reserved(k).length)
              else processCode(buf(i).toChar.toString, i+1)
            } else processCode(buf(i).toChar.toString, i+1)
        }
      }
 
      verbLine = lineCount
      processCode("", processLeadingWhitespace(skipLeadingBlanks(i)))
    }
 
    /** process a TeX comment; return next following index */
    def processTeXComment(i: int): int = 
      if (i == buf.length || buf(i) == '\n') i
      else { putc(buf(i)); processTeXComment(i+1) }
 
    /** Process TeX text; return next following index */
    def processTex(i: int): int = {
      if (i == buf.length) {
        i
      } else if (buf(i) == '%') {
        processTex(processTeXComment(i))
      } else if (startsWith(i, beginVerbatim)) {
	puts(beginProgramString)
        val j = processVerbatim(i + beginVerbatim.length, endVerbatim)
        puts(endProgramString)
        processTex(j)
      } else if (startsWith(i, verb)) {
	puts(progString+"{")
        val j = i + verb.length
        val end = new String(Array(buf(j).toChar))
        val k = processVerbatim(j+1, end)
        puts("}")
        processTex(k)
      } else {
        if (buf(i) == '\n') lineCount = lineCount+1
        putc(buf(i))
        processTex(i+1)
      }
    }
 
    // begin body of process
    puts("""%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
           |% DO NOT EDIT.  Automatically generated file! %
           |%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
           |""".stripMargin)
    lineCount = 1
    processTex(0)
  }
 
  /** The main method.
   *  Note that a Scala method without `=' between the 
   *  header and the bidy is a procedure. Its result type is `unit'
   *  which corresponds to `void' in Java.
   */
  def main(args: Array[String]) {
    if (args.length != 2) {
      val classname = new Error().getStackTrace()(0).getClassName()
      System.err.println(
        "Usage: " + classname + " <source-file> <destination-file>")
        System.exit(1)
    }
    System.out.print("[verbfilterScala "+args(0)+" "+args(1))
    val in = new FileInputStream(new File(args(0)))
    val buf = new Array[byte](in.available())
    in.read(buf, 0, buf.length)
    in.close()
    val out =
      new BufferedOutputStream(
        new FileOutputStream(
          new File(args(1))))
    try {
      process(buf, out)
    } catch {
      case ex: RuntimeException =>
        System.err.println ("\n **** error at line " + verbLine)
        throw ex
    }
    out.close()
    System.out.println("]")
  }
}
 
code/verbatim-filter.txt · Last modified: 2006/11/24 06:43 by 83.135.5.182
 
Recent changes RSS feed Valid XHTML 1.0 Driven by DokuWiki