By Martin Odersky
The following program formats Scala code embedded in LaTeX files. It translates all sections in a LaTeX source which are delimited by
\verb@...@
or by
\begin{verbatim}
...
\end{verbatim}.
The translation maps into environments and commands which are configurable by the user. It highlights keywords and also performs some character substitutions. More details are found in the comment below.
The program is a fairly straightforward adaptation of a Java program. It works only with simple data structures: files and arrays of bytes. Compared to the original program it is nevertheless much nicer, mainly because of the use of nested functions and tail-recursion.
import java.io._ /** A text formatter for scala programs. To run it, use either * one of the following commands * * java verbfilterScala <in.src> <in.tex> * scala verbfilterScala <in.src> <in.tex> * * This would translate the file `in.src' into `in.tex'. * The program highlighs reserved words and line comments. * It understands the following formatting commands: * * \= tab set * \> advance to next set tab * $ toggle math mode * \R red color, translates to \color{red} * \B blue color, translates to \color{blue} * \S black color, translates to \color{black} * \G greenn color, translates to \color{green} * \\ a single backslash `\' * \$ a dollar sign `$' * * The source file `in.src' needs to contain definitions of the following * LaTeX commands: * * \vem Highlight keyword * \prog{#1} argument is inline program code * \linecomment Highlight line comment * * It also needs to define the following environment: * * program for multi-line program code. * * The program environment needs to refine LaTeX's tabbing environment, so that * \= and \> are defined. * * Here are examples of these LaTeX definitions: * * \newcommand{\prog}[1] {\darkbluetext{\sf #1}} * \newcommand{\vem} {\color{violet}} * \newcommand{\linecomment} {\color{darkgreen}} * \newenvironment{program} {\begin{quote} \small \darkblue \sf \begin{tabbing}} * {\end{tabbing} \end{quote}} * \definecolor{darkblue}{rgb}{0, 0, 0.4} * \definecolor{darkgreen}{rgb}{0, 0.3, 0.0} */ object verbfilterScala { /** Increment for tabulator characters */ final val TABINC = 8 /** Output commands, to be defined as latex commands in source file */ final val progString = "\\prog" final val reservedString = "\\vem" final val lineCommentString = "\\linecomment" final val beginProgramString = "\\begin{program}" final val endProgramString = "\\end{program}" /** Strings significant for input */ final val beginVerbatim = "\\begin{verbatim}" final val endVerbatim = "\\end{verbatim}" final val verb = "\\verb" /** Reserved words, sorted alphabetically */ val reserved = Array( "abstract", "case", "catch", "class", "def", "do", "else", "extends", "false", "final", "finally", "for", "if", "implicit", "import", "match", "mixin", "new", "null", "object", "override", "package", "private", "protected", "requires", "return", "sealed", "super", "this", "trait", "true", "try", "type", "val", "var", "while", "with", "yield") /** line counters */ var lineCount = 0 var verbLine = 0 /** the main process method */ def process(buf: Array[byte], out: OutputStream) { /** Write string to standard output. Note the for-comprehension * over strings. This works because strings are implicitly * convertible to sequences. */ def puts(s: String) { for (val c <- s) out.write(c) } /** write character to standard output */ def putc(c: byte) { out.write(c) } /** Do buffer contents at given offset start with given string */ def startsWith(offset: int, s: String) = { var i = 0 while (i < s.length && buf(offset+i) == s.charAt(i)) i = i + 1 i == s.length } /** Compare buffer contents at given offset with given string */ def compare(offset: int, key: String): int = { var i = offset var j = 0 val l = key.length while (i < buf.length && j < l) { val bch = buf(i).toChar val kch = key.charAt(j) if (bch < kch) return -1 else if (bch > kch) return 1 i = i + 1 j = j + 1 } if (j < l) -1 else if (i < buf.length && ('A' <= buf(i) && buf(i) <= 'Z' || 'a' <= buf(i) && buf(i) <= 'z' || '0' <= buf(i) && buf(i) <= '9' || buf(i) == '_')) 1 else 0 } /** If buffer at offset `i' is a keyword, return its index * in the `reserved' table; otherwise return -1 */ def keyIndex(i: int): int = { var lo = 0 var hi = reserved.length - 1 while (lo <= hi) { val mid = (hi + lo) / 2 val diff = compare(i, reserved(mid)) if (diff < 0) hi = mid - 1 else if (diff > 0) lo = mid + 1 else return mid } -1 } /** Skip blanks and at most one newline; return following index */ def skipLeadingBlanks(i: int): int = if (buf(i) == ' ') skipLeadingBlanks(i+1) else if (buf(i) == '\n') { lineCount = lineCount + 1; i+1 } else i /** Process a `verbatim' or `verb' section. * @param i The offset in `buf' where it starts * @param end The string delimiting the section at the end * @return index after the section */ def processVerbatim(i: int, end: String): int = { val END = end.charAt(0) /** Process line comment starting at `i'; return mext following index */ def processLineComment(i: int): int = if (buf(i) == '\n' || startsWith(i, end)) i else { putc(buf(i)); processLineComment(i+1) } /** Process math region starting at `i'; return next following index. */ def processMath(i: int): int = { putc(buf(i)) if (buf(i) == '$') i+1 else processMath(i+1) } /** Process initial whitespace of line; return next following index */ def processLeadingWhitespace(i: int): int = { def loop(i: int, col: int): int = { if (buf(i) == ' ') { puts("~~") loop(i+1, col+1) } else if (buf(i) == '\t') { var c = col do { puts("~~"); c = c+1 } while (c % TABINC != 0) loop(i+1, c) } else { i } } loop(i, 0) } /** Emit `pre' and then process normal Scala code starting at index `i'. * Return next following index */ def processCode(pre: String, i: int): int = { puts(pre) buf(i) match { case END if startsWith(i, end) => i + end.length case '\n' => lineCount = lineCount+1 if (startsWith(i+1, end)) processCode("\n", i+1) else { puts("\\\\") processCode("", processLeadingWhitespace( if (buf(i+1) == '\n') { puts("[0.5em]") lineCount = lineCount+1 i+2 } else i+1)) } case ' ' => processCode("~", i+1) case '^' => processCode("\\^~$\\!\\!$", i+1) case '&' => processCode("\\&", i+1) case '*' => processCode("$*$", i+1) case '%' => processCode("$\\%$", i+1) case '_' => processCode("\\_", i+1) case '~' => processCode("\\~~$\\!\\!$", i+1) case '{' => processCode("{\\small\\{}", i+1) case '}' => processCode("{\\small\\}}", i+1) case '[' => processCode("$[$", i+1) case ']' => processCode("$]$", i+1) case '(' => processCode("$($", i+1) case ')' => processCode("$)$", i+1) case ':' => if (i > 0 && Character.isJavaIdentifierPart(buf(i-1).toChar)) puts("\\,") processCode("{\\rm :}", i+1) case '<' => if (buf(i+1) == '=') processCode("$\\leq$", i+2) else if (buf(i+1) == '-') processCode("$\\leftarrow$", i+2) else if (buf(i+1) == '<') processCode("$<\\!$", i+1) else processCode("$<$", i+1) case '>' => if (buf(i+1) == '=') processCode("$\\geq$", i+2) else if (buf(i+1) == '>') processCode("$>\\!$", i+1) else processCode("$>$", i+1) case '=' => if (buf(i+1) == '=') processCode("$==$", i+2) else if (buf(i+1) == '>') processCode("$\\Rightarrow$", i+2) else processCode("=", i+1) case '/' => if (buf(i+1) == '/') processCode(lineCommentString+"//", i+2) else processCode("/", i+1) case '-' => if (buf(i+1) == '>') processCode("$\\rightarrow$", i+2) else processCode("$-$", i+1) case '+' => processCode("$+$", i+1) case '|' => processCode("$\\,|$", i+1) case '#' => processCode("\\#", i+1) case '\\' => if (buf(i+1) == '=') processCode("\\=", i+2) else if (buf(i+1) == '>') processCode("\\>", i+2) else if (buf(i+1) == '$') processCode("\\$", i+2) else if (buf(i+1) == 'R') processCode("\\color{red}", i+2) else if (buf(i+1) == 'S') processCode("\\color{black}", i+2) else if (buf(i+1) == 'B') processCode("\\color{blue}", i+2) else if (buf(i+1) == 'G') processCode("\\color{green}", i+2) else processCode("$\\backslash$", i+1) case '$' => puts("$") processCode("", processMath(i+1)) case _ => if (i == 0 || !Character.isJavaIdentifierPart(buf(i-1).toChar)) { val k = keyIndex(i) if (k >= 0) processCode("{"+reservedString+" "+reserved(k)+"}", i+reserved(k).length) else processCode(buf(i).toChar.toString, i+1) } else processCode(buf(i).toChar.toString, i+1) } } verbLine = lineCount processCode("", processLeadingWhitespace(skipLeadingBlanks(i))) } /** process a TeX comment; return next following index */ def processTeXComment(i: int): int = if (i == buf.length || buf(i) == '\n') i else { putc(buf(i)); processTeXComment(i+1) } /** Process TeX text; return next following index */ def processTex(i: int): int = { if (i == buf.length) { i } else if (buf(i) == '%') { processTex(processTeXComment(i)) } else if (startsWith(i, beginVerbatim)) { puts(beginProgramString) val j = processVerbatim(i + beginVerbatim.length, endVerbatim) puts(endProgramString) processTex(j) } else if (startsWith(i, verb)) { puts(progString+"{") val j = i + verb.length val end = new String(Array(buf(j).toChar)) val k = processVerbatim(j+1, end) puts("}") processTex(k) } else { if (buf(i) == '\n') lineCount = lineCount+1 putc(buf(i)) processTex(i+1) } } // begin body of process puts("""%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |% DO NOT EDIT. Automatically generated file! % |%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |""".stripMargin) lineCount = 1 processTex(0) } /** The main method. * Note that a Scala method without `=' between the * header and the bidy is a procedure. Its result type is `unit' * which corresponds to `void' in Java. */ def main(args: Array[String]) { if (args.length != 2) { val classname = new Error().getStackTrace()(0).getClassName() System.err.println( "Usage: " + classname + " <source-file> <destination-file>") System.exit(1) } System.out.print("[verbfilterScala "+args(0)+" "+args(1)) val in = new FileInputStream(new File(args(0))) val buf = new Array[byte](in.available()) in.read(buf, 0, buf.length) in.close() val out = new BufferedOutputStream( new FileOutputStream( new File(args(1)))) try { process(buf, out) } catch { case ex: RuntimeException => System.err.println ("\n **** error at line " + verbLine) throw ex } out.close() System.out.println("]") } }