# Diff/Patch/XDiff/XDelta für Java



## 0xdeadbeef (29. Sep 2005)

Ich suche eine (freie) Diff/Patch-API, mit der man zumindest aus zwei Versionen von Textdateien eine Differenzdatei und später wieder aus Differenzdatei und Originaldatei die veränderte Datei patchen kann. Optimalerweise wäre eine API, die bereits Binärdateien unterstützt. 
Leider sind die meisten solche Projekte tot (XDelta für Java) oder sehr Beta (JSRC).
Kennt jemand eine stabile, kleine API, die keine allzu restriktive OS-Lizenz hat?


----------



## 0xdeadbeef (2. Okt 2005)

Um das abzuschließen: zwar ist es mir gelungen, aus dem Jakarta JRCS-Wust eine lauffähige textbasierte Diff/Patch zusammenzuschrauben, aber so richtig meinen Bedürfnissen entsprochen hat diese Lösung dann doch nicht.

Habe mir deshalb selber ein binäres Diff/Patch gebastelt. Zwar ein extrem simpler Algorithmus un in jeder Hinsicht häßlich implementiert, aber für meine Zwecke wesentlich besser geeignet.

[EDIT]
Hm, das Abhaken des Threads geht irgendwie nicht. Hab zwar den Button, aber keine Änderung wenn ich ihn drücke.
Wie auch immer: Der Thread kann als abgehakt betrachtet werden.


----------



## mic_checker (12. Okt 2005)

Vielleicht könntest du ja mal deinen Algorithmus posten (?)


----------



## 0xdeadbeef (16. Okt 2005)

Hm, naja, wie gesagt: ist eigentlich nur für meine speziellen Zwecke gedacht und nicht wirklich clever.
Aber bitte:


```
import java.util.ArrayList;
import java.util.List;
import java.util.zip.Adler32;

/**
 * @author 0xdeadbeef
 * Simple diff/patch algorithm for text or binary files
 * In contrast to common line based diff utilities, this algorithm
 * works byte based to create small binary difference files.
 * However this very simple approach is only snesible for small files.
 * It it by no way meant as rival to full featured approaches like XDelta. 
 */
public class Diff {
	
	/**
	 * Set diff parameters
	 * @param winLen Length of windows to sarch for resynchronisation
	 * @param resyncLen Number of equal bytes needed for resynchronisation
	 */
	public static void setParameters(int winLen, int resyncLen) {
		resyncLength = resyncLen;
		windowLength = winLen;
	}

	/**
	 * Create diff buffer from the differences between source and target buffer
	 * @param bsrc source buffer
	 * @param btrg target buffer
	 * @return buffer of differences
	 */
	public static byte[] diffBuffers(byte bsrc[], byte btrg[]) {
		ArrayList patch = new ArrayList(); // <Byte>		
		Buffer src = new Buffer(bsrc);
		Buffer trg = new Buffer(btrg);
		
		// write header
		setDWord(patch,HEADER_ID);
		// write lengths to patch list
		setLen(patch,src.length());
		setLen(patch,trg.length());
		// write crcs to patch list
		Adler32 crc = new Adler32();
		crc.update(src.getData());
		setDWord(patch,(int)crc.getValue());
		crc.reset();
		crc.update(trg.getData());
		setDWord(patch,(int)crc.getValue());
		setDWord(patch,DATA_ID);
		
		// examine source buffer
		int ofs = 0;
		while (src.getIndex() < src.length()) {
			// search for difference
			int s = src.getByte();
			int t = trg.getByte();
			if (s == t) {
				ofs++;
				continue;
			}
			// reset indeces
			src.setIndex(src.getIndex()-1);
			trg.setIndex(trg.getIndex()-1);
			// write offset
			setLen(patch,ofs);
			System.out.println("Offset: "+ofs);
			ofs = 0;
			// check for insert, delete, replace
			int len, leni, lend, lenr, lens[];
			int state = -1;
			
			leni = checkInsert(src,trg);
			lend = checkDelete(src,trg);
			lenr = checkReplace(src,trg);
			lens = checkSubstitute(src,trg);
			len = Math.min(leni,lend);
			len = Math.min(len,lenr);
			len = Math.min(len,lens[1]);
			if (len > windowLength) {
				// completely lost synchronisation
				int rs = src.length() - src.getIndex();
				int rt = trg.length() - trg.getIndex(); 
				if (rs==rt) {
					len = rs;
					state = REPLACE;
				} else {
					len = rt;
					state = INSERT;
				}
				break;				
			}
			if (len == leni) 
				state = INSERT;
			else if (len == lend) 
				state = DELETE;
			else if (len == lenr) 
				state = REPLACE;
			else if (len == lens[1])
				state = SUBSTITUTE;			
			
			switch (state) {
				case INSERT : 
					// insert
					System.out.println("Insert: "+len);
					patch.add(new Byte(INSERT));
					setLen(patch,len);
					for (int i = 0; i<len; i++)  
						patch.add(new Byte((byte)trg.getByte()));					
					break; 
				case DELETE: 
					// delete
					System.out.println("Delete: "+len);
					patch.add(new Byte(DELETE));
					setLen(patch,len);
					src.setIndex(src.getIndex()+len);
					break;
				case REPLACE:
					// replace
					System.out.println("Replace: "+len);
					patch.add(new Byte(REPLACE));
					setLen(patch,len);
					for (int i = 0; i<len; i++)  
						patch.add(new Byte((byte)trg.getByte()));
					src.setIndex(src.getIndex()+len);
					break;
				case SUBSTITUTE:
					// replace
					System.out.println("Substitute: "+lens[0]+"/"+lens[1]);
					patch.add(new Byte(SUBSTITUTE));
					setLen(patch,lens[0]);
					setLen(patch,lens[1]);
					for (int i = 0; i<lens[1]; i++)  
						patch.add(new Byte((byte)trg.getByte()));
					src.setIndex(src.getIndex()+lens[0]);
					break;
			}		
		}
		
		// if the files end identically, the offset needs to be written
		if (ofs != 0) {
			System.out.println("Offset: "+ofs);
			setLen(patch,ofs);
		}
		
		// check for stuff to insert in target
		if (trg.getIndex() < trg.length()) {			
			patch.add(new Byte(INSERT));
			int len = trg.length() - trg.getIndex();
			System.out.println("Insert (End): "+len);
			setLen(patch,len);
			for (int i = 0; i<len; i++)  
				patch.add(new Byte((byte)trg.getByte()));	
		}
		
		if (patch.size() == 0)
			return null;
		
		System.out.println("Patch length: "+patch.size());
		
		// convert patch list to output byte array
		byte retVal[] = new byte[patch.size()];
		for (int i =0; i<retVal.length;i++)
			retVal[i] = ((Byte)patch.get(i)).byteValue();
		return retVal;
	}
	
	/**
	 * Create a target buffer from a source buffer and a buffer of differences
	 * @param bsrc source buffer
	 * @param bpatch buffer containing differences
	 * @return target buffer created from a source buffer and a buffer of differences
	 * @throws DiffException
	 */
	public static byte[] patchbuffers(byte bsrc[], byte bpatch[]) throws DiffException {
		Buffer src = new Buffer(bsrc);
		Buffer patch = new Buffer(bpatch);
		// calculate src crc
		Adler32 crc = new Adler32();
		crc.update(src.getData());
		// analyze header
		if (patch.getDWord() != Diff.HEADER_ID)
			throw new DiffException("No header id found in patch");
		int lenSrc = getLen(patch);
		if (lenSrc != src.length())
			throw new DiffException("Size of source differs from that in patch header");
		int lenTrg = getLen(patch);
		if (patch.getDWord() != (int)crc.getValue())
			throw new DiffException("CRC of source differs from that in patch header");
		int crcTrg = patch.getDWord();
		if (patch.getDWord() != Diff.DATA_ID)
			throw new DiffException("No data id found in patch header");
				
		Buffer trg = new Buffer(lenTrg);
		
		// step through patch buffer 
		try {
			while (patch.getIndex()<patch.length()) {
				int ofs = getLen(patch);
				System.out.println("Offset: "+ofs);
				// copy bytes from source buffer
				for (int i=0; i<ofs; i++)
					trg.setByte((byte)src.getByte());
				// check for patch buffer empty
				if (patch.getIndex()==patch.length())
					break;
				// now there must follow a command followed by a
				int cmdIdx = patch.getIndex(); // just for exception
				int cmd = patch.getByte();
				int len = getLen(patch);
				switch (cmd) {
					case Diff.DELETE:
						System.out.println("Delete: "+len);
						src.setIndex(src.getIndex()+len);
						break;
					case Diff.REPLACE:
						System.out.print("Replace/");
						src.setIndex(src.getIndex()+len);
						// fall through
					case Diff.INSERT:
						System.out.println("Insert: "+len);
						for (int r=0; r<len;r++) 
							trg.setByte((byte)patch.getByte());
						break;
					case Diff.SUBSTITUTE: {
						int lenT = getLen(patch);
						System.out.println("Substitute: "+len+"/"+lenT);
						src.setIndex(src.getIndex()+len);
						for (int r=0; r<lenT;r++) 
							trg.setByte((byte)patch.getByte());
						break; }
					default:
						throw new DiffException("Unknown command "+cmd+" at patch offset "+cmdIdx);
				}
			}
		} catch (ArrayIndexOutOfBoundsException ex) {
			throw new DiffException("Array index exceeds bounds. Patch file corrupt...");
		}
		
		// check length
		if (trg.getIndex() != lenTrg)
			throw new DiffException("Size of target differs from that in patch header");
		
		// compare crc
		crc.reset();
		crc.update(trg.getData());
		if (crcTrg != (int)crc.getValue())
			throw new DiffException("CRC of target differs from that in patch");
		
		return trg.getData();
	}

	/**
	 * Lengths/Offset are stored as 7bit values. The 8th bit is used as marker if the number
	 * is continued in the next byte.
	 * @param b Buffer from which to read the length/offset
	 * @return integer value of length/offset
	 * @throws ArrayIndexOutOfBoundsException
	 */
	private static int getLen(Buffer b) throws ArrayIndexOutOfBoundsException {
		int val = 0;
		int v;
		int shift = 0;
		do {
			v = b.getByte();
			if ((v & 0x80) == 0) {
				// no continue bit set
				val += (v<<shift);
				break;
			}
			// erase contine marker bit
			v &= 0x7f;
			val += (v<<shift);
			shift += 7;
		} while (true); 
		return val;		
	}	
	
	/**
	 * Store length/offset information in 7bit encoding. A set 8th bit means: continued in next byte
	 * So 127 is stored as 0x7f, but 128 is stored as 0x80 0x01 (where 0x80 means 0, highest bit is marker)
	 * @param l Patch list to add length/offset in 7bit encoding 
	 * @param val Value to add in 7bit encoding
	 */
	private static void setLen(List l, int val) {
		while ( val > 0x7f) {
			l.add(new Byte((byte)(val & 0x7f | 0x80)));
			val >>>= 7;
		}
		l.add(new Byte((byte)val));
	}	
	
	/**
	 * Check for "insert" difference
	 * @param src source buffer
	 * @param trg target buffer
	 * @return number of bytes inserted
	 * @throws ArrayIndexOutOfBoundsException
	 */
	private static int checkInsert(Buffer src,Buffer trg) throws ArrayIndexOutOfBoundsException {
		byte[] bs = src.getData();
		int is = src.getIndex();
		byte[] bt = trg.getData();
		int it = trg.getIndex();
		int len = windowLength;
		if (is+len+resyncLength >= bs.length)
			len = bs.length - is - resyncLength;
		if (it+len+resyncLength >= bt.length)
			len = bt.length - it - resyncLength;
		for (int w=1; w<len; w++) {
			int r;
			for (r = 0; r<resyncLength; r++) 
				if (bs[is+r] != bt[it+w+r]) 
					break;			
			if (r == resyncLength)
				return w;
		}
		return Integer.MAX_VALUE;
	}

	/**
	 * Check for "delete" difference
	 * @param src source buffer
	 * @param trg target buffer
	 * @return number of bytes deleted
	 * @throws ArrayIndexOutOfBoundsException
	 */
	private static int checkDelete(Buffer src,Buffer trg) throws ArrayIndexOutOfBoundsException {
		byte[] bs = src.getData();
		int is = src.getIndex();
		byte[] bt = trg.getData();
		int it = trg.getIndex();
		int len = windowLength;
		if (is+len+resyncLength >= bs.length)
			len = bs.length - is - resyncLength;
		if (it+len+resyncLength >= bt.length)
			len = bt.length - it - resyncLength;
		for (int w=1; w<len; w++) {
			int r;
			for (r = 0; r<resyncLength; r++) 
				if (bs[is+w+r] != bt[it+r]) 
					break;			
			if (r == resyncLength)
				return w;
		}
		return Integer.MAX_VALUE;
	}

	/**
	 * Check for "replace" difference
	 * @param src source buffer
	 * @param trg target buffer
	 * @return number of bytes replaced
	 * @throws ArrayIndexOutOfBoundsException
	 */
	private static int checkReplace(Buffer src,Buffer trg) throws ArrayIndexOutOfBoundsException {
		byte[] bs = src.getData();
		int is = src.getIndex();
		byte[] bt = trg.getData();
		int it = trg.getIndex();
		int len = windowLength;
		if (is+len+resyncLength >= bs.length)
			len = bs.length - is - resyncLength;
		if (it+len+resyncLength >= bt.length)
			len = bt.length - it - resyncLength;
		for (int w=1; w<len; w++) {
			int r;
			for (r = 0; r<resyncLength; r++) 
				if (bs[is+w+r] != bt[it+w+r]) 
					break;			
			if (r == resyncLength)
				return w;
		}
		return Integer.MAX_VALUE;
	}
	
	/**
	 * Check for "substitute" difference
	 * @param src source buffer
	 * @param trg target buffer
	 * @return integer array: [0]: number of bytes to delete in source, [1]: number of bytes to insert in target
	 * @throws ArrayIndexOutOfBoundsException

	 */
	private static int[] checkSubstitute(Buffer src,Buffer trg) throws ArrayIndexOutOfBoundsException {
		byte[] bs = src.getData();
		int is = src.getIndex();
		byte[] bt = trg.getData();
		int it = trg.getIndex();
		int len = windowLength;
		if (is+len+resyncLength >= bs.length)
			len = bs.length - is - resyncLength;
		if (it+len+resyncLength >= bt.length)
			len = bt.length - it - resyncLength;
		
		ArrayList solutions = new ArrayList(); // <int[]>
		
		for (int ws=1; ws<len; ws++) {
			for (int wt=1; wt<len; wt++) {
				int r;
				for (r = 0; r<resyncLength; r++) 
					if (bs[is+ws+r] != bt[it+wt+r]) 
						break;			
				if (r == resyncLength) {
					int retVal[] = new int[2];
					retVal[0] = ws;
					retVal[1] = wt;
					solutions.add(retVal);
				}
			}
		}

		if (solutions.size() == 0) {
			// nothing found
			int retVal[] = new int[2];
			retVal[0] = Integer.MAX_VALUE;
			retVal[1] = Integer.MAX_VALUE;
			return retVal;
		}
		
		// search best solution
		int sMinIdx = 0;
		for (int i=1; i<solutions.size(); i++) {
			int s[] = (int[])solutions.get(i);
			int sMin[] = (int[])solutions.get(sMinIdx);
			if (s[0]+s[1] < sMin[0]+sMin[1])
				sMinIdx = i;
		}
		return (int[])solutions.get(sMinIdx);
	}
	
	/**
	 * Write DWord to difference list
	 * @param l difference list
	 * @param val DWord value
	 */
	private static void setDWord(List l, int val) {
		l.add(new Byte((byte)val));
		l.add(new Byte((byte)(val>>8)));
		l.add(new Byte((byte)(val>>16)));
		l.add(new Byte((byte)(val>>24)));
	}
	
	private static int resyncLength = 4;   // default resync length
	private static int windowLength = 512; // default resynchronisation window length
	
	private final static byte INSERT = 0;
	private final static byte DELETE = 1;
	private final static byte REPLACE = 2;
	private final static byte SUBSTITUTE = 3;
	
	private final static int HEADER_ID = 0xdeadbeef;
	private final static int DATA_ID = 0xfade0ff;
}



/**
 * @author 0xdeadbeef
 * Buffer class that manages reading/writing from/to a byte buffer
 *
 */
class Buffer {
	private byte buffer[];
	private int  index;
	
	Buffer(int size) {
		index = 0;
		buffer = new byte[size];
	}
	
	Buffer(byte b[]) {
		index = 0;
		buffer = b;
	}
	
	int length() {
		return buffer.length;
	}
	
	int getIndex() {
		return index;
	}
	
	byte[] getData() {
		return buffer;
	}

	void setIndex(int idx) {
		index = idx;
	}
	
	int getByte() throws ArrayIndexOutOfBoundsException {
		return buffer[index++] & 0xff;
	}
	
	void setByte(byte val) throws ArrayIndexOutOfBoundsException {
		buffer[index++] = val;
	}

	int getWord() throws ArrayIndexOutOfBoundsException {
		return getByte() | (getByte()<<8);
	}

	void setWord(int val) throws ArrayIndexOutOfBoundsException {
		setByte((byte)val);
		setByte((byte)(val>>8));
	}
	
	int getDWord() throws ArrayIndexOutOfBoundsException {
		return getByte() | (getByte()<<8) | (getByte()<<16) | (getByte()<<24);
	}
	
	void setDWord(int val) throws ArrayIndexOutOfBoundsException {
		setByte((byte)val);
		setByte((byte)(val>>8));
		setByte((byte)(val>>16));
		setByte((byte)(val>>24));
	}
}

class DiffException extends Exception {
    final static long serialVersionUID = 0x000000001;
    
    public DiffException() {
        super();
    }

    public DiffException(String s) {
        super(s);
    }
}
```


----------

