#!/usr/bin/perl # # Mark Doll , 2006. # # This is a perl implementation of TeXdiff, derived from the original bash and # perl scripts created by Robert Maron , available at # http://www.robmar.net/TexDiff/. This version of texdiff is available at # http://mark.doll.name/texdiff/. # # usage: texdiff old.tex new.tex [diff.tex] # # requires the following LaTeX code in the preamble of the LaTeX document: # # \usepackage{color} \usepackage{ulem} \usepackage{changebar} # \newcommand\TLSins[1]{\cbstart{}\textcolor{ins}{\uline{#1}}\cbend{}} # \newcommand\TLSdel[1]{\cbdelete{}\textcolor{del}{\sout{#1}}} # # These macros are NOT automatically interted at \begin{document} like # the original scripts did it, because this will not work on files of # a multi-file document (those included by \include or \input). # Furthermore, if you insert the macros manually, you can tune them as # you like, i. e. change text color or position of changebars. Have a # look at the documentation of the ulem and changebar packages on how # to do this! # use strict; ## Problematic commands, that will be removed from deletions and moved ## out of insertions. The array index equals the number of mandatory ## command parameters. ## INSERT YOUR HOMEBREW MACROS HERE!!! # Omit leading backslash and separate by '|', since this will be # interpreted as a regular expression. my @mycommands = ( '', '', '', '', '', 'Abb(?:ildung)?(?:p[psw])?', ); ## standard LaTeX macros #FIXME: add more macros here! my @stdcommands = ( '\\\\', # will match one backslash inside regex ); ## allow nesting of braces (aka curly brackets) up to 10**$w-1 levels my $w = 2; ## wdiff marker my $delstart = "TLSdel"; my $delend = "TLEdel"; my $insstart = "TLSins"; my $insend = "TLEins"; ## LaTeX macros used to mark deletions/insertions in the output my $texdelstart = "\\protect\\TLSdel\{"; my $texdelend = "\}"; my $texinsstart = "\\protect\\TLSins\{"; my $texinsend = "\}"; ## parse command line my ($in1,$in2,$out) = @ARGV; ## create temp files from input files with all comments removed and ## all braces numbered write_temp($w,$in1,'tmp1.'.$$); write_temp($w,$in2,'tmp2.'.$$); sub write_temp { my ($w,$file,$tmp) = @_; $_ = `cat $file`; ## mark pairs of braces with the same $w digits. # i. e. convert "{ { {} } {}}" to "{01 {02 {03}03 }02 {02}02}01" ($w=2). my $max=10**$w-1; $::cnt = 0; s/ (?= $max); sprintf("\{%0${w}d",++$::cnt) }) | \} (?{ sprintf("\}%0${w}d",$::cnt--) }) ) /$^R/gx; print STDERR "Warning: ".abs($::cnt)." unmatched ".($::cnt > 0 ? "opening" : "closing") ." brace".(abs($::cnt) == 1 ? "" : "s")."\n" if ($::cnt != 0); ## remove comments # also remove trailing newline and all whitespace at the # beginning of the following line like TeX does it s/%.*?\n[ \t]*//g; ## output open(O,">$tmp") or return("Can't open $tmp: $!\n"); print O; close O; } my $wdiff_cmd = ( "wdiff" . " --start-delete=\'$delstart\' --end-delete=\'$delend\'" . " --start-insert=\'$insstart\' --end-insert=\'$insend\'" . " tmp1.$$ tmp2.$$"); $_ = `$wdiff_cmd`; ### ### Phase I: generic processing ### ## remove paragraph boundaries (two newlines) that wdiff falsely inserted # FIXME: Does wdiff always insert same spacing before deletion and the # following insertion?! s/( ([ \t]*\n[ \t]*\n[ \t]*) $delstart (?!$delend) (?: . (?!$delend) )* . $delend ) \2 $insstart /$1\n$insstart/gsx; ## Restore backslashed Spaces (prevents falsely escaped right braces) # Escaped spaces '\ ' at the end of an insertion/deletion will result in the # backslash before the end marker and the space after it. Therefore swap # whitespace and end marker. (This might falsely make the insertion/deletion # spann the following paragraph boundary, but this will be fixed by the next # step below). When the end markers finally get replaced by $tex(ins|del)end, # which typically is a '}', without this swapping, this would result in escaped # right braces and LaTeX complaining about a missing right brace. s/(? (?: ## grab all paired braces (prevent backtracking): (?: . (?! (? (?: (?: . (?! (?$#stdcommands ? $#mycommands : $#stdcommands)) { my $command = ""; $command .= $mycommands[$num] if ($mycommands[$num] ); $command .= "|" if ($mycommands[$num] && $stdcommands[$num]); $command .= $stdcommands[$num] if ( $stdcommands[$num]); next if(!$command); s/ ( # $1: anything before the command $delstart (?!$delend) (?: . (?!$delend) )*? .? ) ( # $2: the command with all its parameters \\(?:$command) # the command itself (preceded by a '\') (?: \[ .*? (?$out")) { print O; close O; } else { print; } ## remove temp files unlink('tmp1.'.$$); unlink('tmp2.'.$$);