Parsing XML output from subversion

At my office we’ve got a wide (many projects) and large (tons of stuff) subversion repository. The svn tool can take upwards of five minutes to perform an update against a branch of the repository. svn does a nice job of emitting xml output if you request it, so I wrote this utility to accelerate the update process for repositories that have the same general shape as ours.

It works by checking the workspace’s revision number, then reading log entries that follow that number. The paths of the changes are parsed out, then subversion updates are performed only in those subdirectories that require them.

package com.mosol.svu;
 
/** Performs a fast update of a subversion repository by examining log entries
that are more recent than the workspace's recorded revision number.  The set of
top level directories containing updates are determined, and an update is issued
against those directories only.  A non-recursive update is then issued against the
root so the workspace revision is updated properly.
This code is in the public domain.
@author Ross Judson
 
 
*/
object svu {
 
  import java.net.URI
  import java.io.{File}
  import Console._
  import scala.xml._
  import iostream.IOStream._
  
  val VERSION = "0.21"
  val NF = java.text.NumberFormat.getInstance()
  
  var verbose = false
  var root: Option[File] = None
  var svnTime = 0L
  
  def main(args: Array[String]): unit = {
    arg(args.toList)
    run
  }
  
  private def arg(a: List[String]): Unit = a match {
    case Nil => {}
    case "-v" :: tail => 
      verbose = true
      arg(tail)
    case head :: tail if (new File(head).isDirectory) => 
      root = Some(new File(head))
      arg(tail)
    case _ :: tail => arg(tail)
  }
  
  /** Print something. */
  private def p[T](t: T) = println(t)
  
  /** Print something if verbose is on. */
  private def pv[T](t: T) = if (verbose) println(t)
  
  private def run = {
    print("SVU Subversion Quick Update by Ross Judson; version " + VERSION + "\nChecking workspace revision...")
    
    // execute an "svn info" in the current directory, so we can find out about 
    // the workspace we're in. the result is an xml element, which we 
    // use an xpath-like search to identify the first entry element. 
    val workspaceEntry = (svn("info") \\ "entry")(0)
    println
    
    // determine the revision number the workspace is currently at
    val rev = revision(workspaceEntry); 
    
    // determine the url for the workspace by scanning the children
    // of the workspaceEntry for the first element tagged as "url".
    // extract its text content.
    val url = (workspaceEntry \ "url")(0).text
    
    // determine the repository's root url by look for a "repository" tag,
    // then looking inside that for a "root" tag. we take the first element
    // we find there, then extract its text content.
    val repositoryRoot = (workspaceEntry \ "repository" \ "root")(0).text
 
    // find the prefix we'll be removing from log entries
    val prefix = url.substring(repositoryRoot.length) + '/'
    
    // ask subversion for the log entries between our workspace revision
    // and the head of the branch.
    val log = svn("log", "-r", "HEAD:" + rev, "-v")
    
    // scan the log to find the latest revision number. we use xpath to 
    // get the revision numbers in text form, then fold them through a 
    // maximum function. an implicit conversion changes
    // those into numbers so we can find the maximum. 
    val latest = (log \\ "logentry").foldLeft(rev)((r,e) => Math.max(revision(e), r))
    
    pv("Repository root: " + repositoryRoot)
    pv("Prefix: " + prefix)
    println("Workspace at rev. " + rev + "; repository rev. " + latest +"; " + url)
    
    // identify the top-most folders for checkins. note that
    // topFolder is a partially applied function, primed with the
    // length of the prefix we want to remove.
    val checkins = paths(log,rev,prefix).toList.map(topFolder(prefix.length))
    
    // remove duplicates from the checkin folder list
    val pathways = checkins.removeDuplicates.toList
    
    // determine what needs to be done
    pathways match {
      case Nil =>
        println("No updates needed.")
      case pset =>
        println("Revisions checked in: " + checkins.length)
        println("Updates required to:")
        pset map (d => println('\t' + d))
        
        // execute an update on the directories that need it
        print("Updating...")
        svnNonXML(("up" :: pset):_*)
        
        // do a non-recursive update on the root directory
        // only, which will bring the apparent revision of the
        // workspace up to date.  
        print("done\nUpdating root...")
        svnNonXML("up", "-N")
        
        println("Done in " + NF.format(svnTime / 1000.0) + 's')
    }
  }
  
  private def topFolder(pl: int)(p: NodeSeq) = p.text.substring(pl).split("/")(0)
 
  // Grab the revision number from the first entry
  private def revision(entry: Node): int = entry.intAttr("revision", 0)
  private def paths(log: Elem, earliest: int, prefix: String) = 
    for (val entry <- log \ "logentry";
         revision(entry) > earliest;
         val path <- entry \\ "path";
         path.text.startsWith(prefix)
         ) yield path;
 
  // Execute a subversion command, in XML mode
  private def svn(cmd: String*) = time { parse(procBuilder(true, cmd:_*)) }
  
  // Non-XML svn command execution (we don't parse the output)
  private def svnNonXML(cmd: String*) = time { normalOutput(procBuilder(false, cmd:_*)) }
  
  private def procBuilder(xml: boolean, cmd: String*) =   
    rooted("svn" :: cmd.toList ::: (if (xml) List("--xml") else Nil):List[String])
  
  /** Time the execution of any block of code, then
  add that time to the svnTime variable. */
  private def time[T](block: => T): T = {
    val now = System.currentTimeMillis();
    val ret = block
    svnTime = svnTime + System.currentTimeMillis() - now;
    ret
  }
  
  private def rooted(pb: ProcessBuilder) = root match {
    case Some(rt) => pb.directory(rt)
    case _ => pb
  }
  private implicit def strToInt(s: String): int = Integer.parseInt(s)
  
  private implicit def elem2Rich(e: Node) = new RichElem(e);
  private class RichElem(e: Node) {
    def intAttr(name: String, defaultValue: Int): Int = e.attributes.get(name) match {
      case Some(ts) => ts.text
      case None => defaultValue
    }
  }
}
 
package iostream;
 
import java.io.{FileInputStream,ByteArrayInputStream,ByteArrayOutputStream,InputStream,OutputStream}
 
object IOStream {
 
  import scala.xml._
  
  // captures the output of the given streams, looping until
  // there isn't any more
  def capture(in: InputStream, cap: OutputStream): unit = new InStream(in).copy(cap)
  // implicit def fileToInputStream(f: java.io.File): InputStream = new FileInputStream(f)
  
  // Gather the output of a process into XML
  def parse(p: Process) = XML.load(wrap(p))
  // Wrap a process so we can capture its output
  def wrap(p: Process) = {
    val cap = new ByteArrayOutputStream()
    join( { capture(p.getInputStream(), cap) }, { capture(p.getErrorStream(), null) })
    new ByteArrayInputStream(cap.toByteArray())
  }
  def normalOutput(p: Process): unit = {
    join( { capture(p.getInputStream(), System.out) }, { capture(p.getErrorStream(), System.err) })
  }
 
  // execute blocks in parallel, waiting until they're all done
  def join(a: => unit, b: => unit):unit = join(spawn(a), spawn(b))
  def join(threads: Thread*) = {
    for (val t <- threads) t.join()  
  }
  // spins up a thread to run the given block
  def spawn(b: => unit) = new Thread {
    start
    override def run() = b
  }
  
  implicit def procBuilderToProcess(pb: ProcessBuilder) = pb.start()
  implicit def stringSeqToProcBuilder[b <: Seq[String]](s: b) = new ProcessBuilder(s.toArray)
  implicit def stringSeqToProcess[b <: Seq[String]](s: b): Process = stringSeqToProcBuilder(s)
}
 
abstract class IOStatus;
case object Finished extends IOStatus;
case object Blocked extends IOStatus;
case class Transfer(bytes: int, buffer: Array[byte]) extends IOStatus {
  def write(out: OutputStream) = out.write(buffer, 0, bytes)
}
 
class InStream(val in: InputStream, var buffer: Array[byte]) {
  var blockSleep = 50
  def this(_in: InputStream) = this(_in, new Array[byte](4096))
  def apply() = {
    val read = in.read(buffer)
    if (read > 0)
      Transfer(read, buffer)
    else if (read == 0)
      Blocked
    else
      Finished
  }
  def close() = in.close()
  def foreach(f: Transfer => unit): unit = apply() match {
    case Finished => close()
    case Blocked => Thread.sleep(blockSleep); foreach(f)
    case t: Transfer => f(t); foreach(f)
  }
  def filter(f: Transfer => boolean) = this
  def copy(out: OutputStream) = foreach(.write(out))
}
 
 
code/subversion-fast-update.txt · Last modified: 2008/02/07 15:42 by 207.152.147.19
 
Recent changes RSS feed Valid XHTML 1.0 Driven by DokuWiki