At my office we’ve got a wide (many projects) and large (tons of stuff) subversion repository. The svn tool can take upwards of five minutes to perform an update against a branch of the repository. svn does a nice job of emitting xml output if you request it, so I wrote this utility to accelerate the update process for repositories that have the same general shape as ours.
It works by checking the workspace’s revision number, then reading log entries that follow that number. The paths of the changes are parsed out, then subversion updates are performed only in those subdirectories that require them.
package com.mosol.svu; /** Performs a fast update of a subversion repository by examining log entries that are more recent than the workspace's recorded revision number. The set of top level directories containing updates are determined, and an update is issued against those directories only. A non-recursive update is then issued against the root so the workspace revision is updated properly. This code is in the public domain. @author Ross Judson */ object svu { import java.net.URI import java.io.{File} import Console._ import scala.xml._ import iostream.IOStream._ val VERSION = "0.21" val NF = java.text.NumberFormat.getInstance() var verbose = false var root: Option[File] = None var svnTime = 0L def main(args: Array[String]): unit = { arg(args.toList) run } private def arg(a: List[String]): Unit = a match { case Nil => {} case "-v" :: tail => verbose = true arg(tail) case head :: tail if (new File(head).isDirectory) => root = Some(new File(head)) arg(tail) case _ :: tail => arg(tail) } /** Print something. */ private def p[T](t: T) = println(t) /** Print something if verbose is on. */ private def pv[T](t: T) = if (verbose) println(t) private def run = { print("SVU Subversion Quick Update by Ross Judson; version " + VERSION + "\nChecking workspace revision...") // execute an "svn info" in the current directory, so we can find out about // the workspace we're in. the result is an xml element, which we // use an xpath-like search to identify the first entry element. val workspaceEntry = (svn("info") \\ "entry")(0) println // determine the revision number the workspace is currently at val rev = revision(workspaceEntry); // determine the url for the workspace by scanning the children // of the workspaceEntry for the first element tagged as "url". // extract its text content. val url = (workspaceEntry \ "url")(0).text // determine the repository's root url by look for a "repository" tag, // then looking inside that for a "root" tag. we take the first element // we find there, then extract its text content. val repositoryRoot = (workspaceEntry \ "repository" \ "root")(0).text // find the prefix we'll be removing from log entries val prefix = url.substring(repositoryRoot.length) + '/' // ask subversion for the log entries between our workspace revision // and the head of the branch. val log = svn("log", "-r", "HEAD:" + rev, "-v") // scan the log to find the latest revision number. we use xpath to // get the revision numbers in text form, then fold them through a // maximum function. an implicit conversion changes // those into numbers so we can find the maximum. val latest = (log \\ "logentry").foldLeft(rev)((r,e) => Math.max(revision(e), r)) pv("Repository root: " + repositoryRoot) pv("Prefix: " + prefix) println("Workspace at rev. " + rev + "; repository rev. " + latest +"; " + url) // identify the top-most folders for checkins. note that // topFolder is a partially applied function, primed with the // length of the prefix we want to remove. val checkins = paths(log,rev,prefix).toList.map(topFolder(prefix.length)) // remove duplicates from the checkin folder list val pathways = checkins.removeDuplicates.toList // determine what needs to be done pathways match { case Nil => println("No updates needed.") case pset => println("Revisions checked in: " + checkins.length) println("Updates required to:") pset map (d => println('\t' + d)) // execute an update on the directories that need it print("Updating...") svnNonXML(("up" :: pset):_*) // do a non-recursive update on the root directory // only, which will bring the apparent revision of the // workspace up to date. print("done\nUpdating root...") svnNonXML("up", "-N") println("Done in " + NF.format(svnTime / 1000.0) + 's') } } private def topFolder(pl: int)(p: NodeSeq) = p.text.substring(pl).split("/")(0) // Grab the revision number from the first entry private def revision(entry: Node): int = entry.intAttr("revision", 0) private def paths(log: Elem, earliest: int, prefix: String) = for (val entry <- log \ "logentry"; revision(entry) > earliest; val path <- entry \\ "path"; path.text.startsWith(prefix) ) yield path; // Execute a subversion command, in XML mode private def svn(cmd: String*) = time { parse(procBuilder(true, cmd:_*)) } // Non-XML svn command execution (we don't parse the output) private def svnNonXML(cmd: String*) = time { normalOutput(procBuilder(false, cmd:_*)) } private def procBuilder(xml: boolean, cmd: String*) = rooted("svn" :: cmd.toList ::: (if (xml) List("--xml") else Nil):List[String]) /** Time the execution of any block of code, then add that time to the svnTime variable. */ private def time[T](block: => T): T = { val now = System.currentTimeMillis(); val ret = block svnTime = svnTime + System.currentTimeMillis() - now; ret } private def rooted(pb: ProcessBuilder) = root match { case Some(rt) => pb.directory(rt) case _ => pb } private implicit def strToInt(s: String): int = Integer.parseInt(s) private implicit def elem2Rich(e: Node) = new RichElem(e); private class RichElem(e: Node) { def intAttr(name: String, defaultValue: Int): Int = e.attributes.get(name) match { case Some(ts) => ts.text case None => defaultValue } } } package iostream; import java.io.{FileInputStream,ByteArrayInputStream,ByteArrayOutputStream,InputStream,OutputStream} object IOStream { import scala.xml._ // captures the output of the given streams, looping until // there isn't any more def capture(in: InputStream, cap: OutputStream): unit = new InStream(in).copy(cap) // implicit def fileToInputStream(f: java.io.File): InputStream = new FileInputStream(f) // Gather the output of a process into XML def parse(p: Process) = XML.load(wrap(p)) // Wrap a process so we can capture its output def wrap(p: Process) = { val cap = new ByteArrayOutputStream() join( { capture(p.getInputStream(), cap) }, { capture(p.getErrorStream(), null) }) new ByteArrayInputStream(cap.toByteArray()) } def normalOutput(p: Process): unit = { join( { capture(p.getInputStream(), System.out) }, { capture(p.getErrorStream(), System.err) }) } // execute blocks in parallel, waiting until they're all done def join(a: => unit, b: => unit):unit = join(spawn(a), spawn(b)) def join(threads: Thread*) = { for (val t <- threads) t.join() } // spins up a thread to run the given block def spawn(b: => unit) = new Thread { start override def run() = b } implicit def procBuilderToProcess(pb: ProcessBuilder) = pb.start() implicit def stringSeqToProcBuilder[b <: Seq[String]](s: b) = new ProcessBuilder(s.toArray) implicit def stringSeqToProcess[b <: Seq[String]](s: b): Process = stringSeqToProcBuilder(s) } abstract class IOStatus; case object Finished extends IOStatus; case object Blocked extends IOStatus; case class Transfer(bytes: int, buffer: Array[byte]) extends IOStatus { def write(out: OutputStream) = out.write(buffer, 0, bytes) } class InStream(val in: InputStream, var buffer: Array[byte]) { var blockSleep = 50 def this(_in: InputStream) = this(_in, new Array[byte](4096)) def apply() = { val read = in.read(buffer) if (read > 0) Transfer(read, buffer) else if (read == 0) Blocked else Finished } def close() = in.close() def foreach(f: Transfer => unit): unit = apply() match { case Finished => close() case Blocked => Thread.sleep(blockSleep); foreach(f) case t: Transfer => f(t); foreach(f) } def filter(f: Transfer => boolean) = this def copy(out: OutputStream) = foreach(.write(out)) }