/*
PatternGenerator
Copyright (C) 2003 Bj\"orn Hoffmeister

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

package de.uni_luebeck.tcs.demo.datamining;

import java.util.*;
import java.util.regex.*;

public class PatternGenerator {
    // presets
    public static final int DNS_PRESET = 0;
    public static final int LATIN_SPACE_PRESET = 1;


    // lateinische Alphabet
    protected static final String[] LATIN = {
	"a", "b", "c", "d", "e", "f", "g", "h", "i", 
	"j", "k", "l", "m", "n", "o", "p", "q", "r", 
	"s", "t", "u", "v", "w", "x", "y", "z"
    };

    // lateinische Alphabet plus Leerzeichen
    public static final String[] LATIN_SPACE = {
	"a", "b", "c", "d", "e", "f", "g", "h", "i", 
	"j", "k", "l", "m", "n", "o", "p", "q", "r", 
	"s", "t", "u", "v", "w", "x", "y", "z", " "
    };

    // Alphabet der DNS: Adenin, Thymin, Guanin und Cytosin
    public static final String[] DNS = {"A", "T", "G", "C"};

    protected String[] strA;
    protected double[] dblCDF;

    public double dblPStuck = 0.5;

    protected int intPatternMinL = 6;
    protected int intPatternMaxL = 24;
    protected boolean blnPatternMaxL = false;

    protected int intStringMinL = 24;
    protected int intStringMaxL = 32;
    protected boolean blnStringMaxL = true;

    protected int intMinExamples = 5;
    protected int intMaxExamples = 10;
    protected boolean blnMaxExamples = false;

    protected final Random rand;

    protected String[] strPattern;
    protected String[] strExample;
    protected String[] strStrictExample;
    protected int[][] intResultIndex;
    protected int[][] intStrictResultIndex;

    public PatternGenerator() {
	this(DNS);
    }

    public PatternGenerator(String[] strA) {
	this.strA= strA;
	this.dblCDF = generateUniformCDF(strA.length);
	this.rand = new Random(System.currentTimeMillis());
    }

    public PatternGenerator(String[] strA, double[] dblCDF) {
	this.strA= strA;
	this.dblCDF = dblCDF;
	this.rand = new Random(System.currentTimeMillis());
    }
    
    public void setNumberOfExamples(int intExamples) {
	this.intMinExamples = intExamples;
	this.intMaxExamples = intExamples;
	this.blnMaxExamples = true;
    }

    public void setNumberOfExamples(int intMinExamples, int intMaxExamples) {
	this.intMinExamples = intMinExamples;
	this.intMaxExamples = StrictMath.max(intMinExamples, intMaxExamples);
	this.blnMaxExamples = (this.intMinExamples == this.intMaxExamples);
    }

    public void setStringLength(int intStringL) {
	this.intStringMinL = intStringL;
	this.intStringMaxL = intStringL;
	this.blnStringMaxL = true;
    }

    public void setStringLength(int intStringMinL, int intStringMaxL) {
	this.intStringMinL = intStringMinL;
	this.intStringMaxL = StrictMath.max(intStringMinL, intStringMaxL);
	this.blnStringMaxL = (this.intStringMinL == this.intStringMaxL);
    }

    public void setPatternLength(int intPatternL) {
	this.intPatternMinL = StrictMath.min((3 * intStringMinL) / 4, intPatternL);
	this.intPatternMaxL = intPatternMinL;
	this.blnPatternMaxL = true;
    }

    public void setPatternLength(int intPatternMinL, int intPatternMaxL) {
	this.intPatternMinL = StrictMath.min((3 * intStringMinL) / 4, intPatternMinL);
	this.intPatternMaxL = StrictMath.min((3 * intStringMinL) / 4, StrictMath.max(intPatternMinL, intPatternMaxL));
	this.blnPatternMaxL = (this.intPatternMinL == this.intPatternMaxL);
    }

    protected double[] generateUniformCDF(int intSize) {
	double[] dblCDF = new double[intSize];
	double dblInc = 1.0 / (double)intSize;

	dblCDF[0] = dblInc;
	for (int i = 1; i < intSize; i++) dblCDF[i] = dblCDF[i - 1] + dblInc;

	return dblCDF;
    }

    
    /*
    public String[] generateExamples(String[] strPattern, int intExamples) {
	int intPatternL = 0;
	for (int i = 0; i < strPattern.length; i++) {
	    intPatternL += strPattern[i].length();
	}

	final int intSubstrings = strPattern.length + 1;
	final StringBuffer[] stbExamples = new StringBuffer[intExamples];
	final int[][] intSubstringL = new int[intExamples][intSubstrings];
	for (int i = 0; i < intExamples; i++) {
	    int intStringL = (blnFixedStringL)?
		this.intStringL - intPatternL:rand.nextInt(this.intStringL - intPatternL) + 1;
	    stbExamples[i] = new StringBuffer(intStringL + intPatternL);
	    int[] intOrder = generateRandomOrder(intSubstrings);
	    for (int j = 1; j < intSubstrings; j++) {
		//		System.out.print(intStringL + ", ");
		intSubstringL[i][intOrder[j]] = rand.nextInt(intStringL + 1);
		intStringL -= intSubstringL[i][intOrder[j]];
	    }
	    //	    System.out.println(intStringL + "");
	    intSubstringL[i][intOrder[0]] = intStringL;

	    // DEBUG BEGIN
	    System.out.print(i + ". " + intSubstringL[i][0]);
	    for (int j = 1; j < intSubstrings; j++) {
		System.out.print(strPattern[j - 1] + intSubstringL[i][j]);
	    }
	    System.out.println();
	    DEBUG END //
	}

	for (int i = 0; i < intSubstrings; i++) {
	    int[] intOrder = generateRandomOrder(intExamples);
	    for (int j = 0; j < intExamples; j++) {
		int intTmpL = intSubstringL[intOrder[j]][i];
		if (intTmpL > 0) {
		    for (int k = 0; k < intTmpL; k++) {
			stbExamples[intOrder[j]].append(getLetter());
		    }
		}
	    }
	    if (i < intSubstrings - 1) {
		for (int j = 0; j < intExamples; j++) stbExamples[j].append(strPattern[i]);
	    }
	}

	String[] strExamples = new String[intExamples];
	for (int i = 0; i < intExamples; i++) strExamples[i] = stbExamples[i].toString();
	return strExamples;	
    }

    public String[] generateCounterExamples(Pattern patPattern, int intExamples) {
	StringBuffer[] stbExamples = new StringBuffer[intExamples];
	
	for (int i = 0; i < intExamples; i++) {
	    int intStringL = (blnFixedStringL)?this.intStringL:rand.nextInt(this.intStringL) + 1;
	    stbExamples[i] = new StringBuffer(intStringL);
	    do {
		for (int j = 0; j < intStringL; j++) stbExamples[i].insert(j, getLetter());		
	    } while(patPattern.matcher(stbExamples[i].toString().toLowerCase()).matches());
	}

	String[] strExamples = new String[intExamples];
	for (int i = 0; i < intExamples; i++) strExamples[i] = stbExamples[i].toString();
	return strExamples;	
    }
*/







    protected String getLetter() {
	double dblP = rand.nextDouble();
	int i;
	for (i = 0; i < dblCDF.length - 1; i++) if (dblCDF[i] > dblP) break;
	return strA[i];
    }

    public int[] generateRandomOrder(int intN) {
	int[] intOrder = new int[intN];
	for (int i = 0; i < intN; i++) intOrder[i] = i;

	return generateRandomOrder(intOrder);
    }

    public int[] generateRandomOrder(int[] intOrder) {
	int intN = intOrder.length;
	int intOrdered = intN;
	int[] intRandom = new int[intN];
	for (int i = 0; i < intN; i++) {
	    int intRandomI = rand.nextInt(intOrdered);
	    intRandom[i] = intOrder[intRandomI];
	    intOrder[intRandomI] = intOrder[--intOrdered];
	}
	return intRandom;
    }

    public boolean[] mix(int intTrue, int intFalse) {
	boolean[] blnMixed = new boolean[intTrue + intFalse];
	int[] intRandom = generateRandomOrder(blnMixed.length);
	for (int i = 0; i < intTrue; i++) blnMixed[intRandom[i]] = true;
	for (int i = intTrue; i < intTrue + intFalse; i++) blnMixed[intRandom[i]] = false;

	return blnMixed;
    }
	    

    public void generatePattern() {
	int intPatternL = (blnPatternMaxL)?
	    intPatternMaxL:rand.nextInt(intPatternMaxL - intPatternMinL + 1) + intPatternMinL;

	boolean[] blnStuck = new boolean[intPatternL - 1];
	int intParts = intPatternL;
	for (int i = 0; i < intPatternL - 1; i++) {
	    if (rand.nextDouble() < dblPStuck) {
		blnStuck[i] = true;
		intParts--;
	    } else {
		blnStuck[i] = false;
	    }
	}
	this.strPattern = new String[intParts];
	strPattern[0] = getLetter();
	int intPart = 0;
	for (int i = 0; i < intPatternL - 1; i++) {
	    if (blnStuck[i]) strPattern[intPart] += getLetter();
	    else strPattern[++intPart] = getLetter();
	}
    }

    public String[] getPattern() {
	return strPattern;
    }

    public void generateExample() {
	generateExample(getPattern());
    }

    public void generateExample(String[] strPattern) {
	final int intExamples = (blnMaxExamples)?
	    intMaxExamples:rand.nextInt(intMaxExamples - intMinExamples + 1) + intMinExamples;
	generateExample(strPattern, intExamples);
    }

    public void generateExample(String[] strPattern, int intExamples) {
	int intPatternL = 0;
	for (int i = 0; i < strPattern.length; i++) intPatternL += strPattern[i].length();	

	StringBuffer[] stbExample = new StringBuffer[intExamples];
	intResultIndex = new int[intPatternL + 1][intExamples];
	for (int i = 0; i < intExamples; i++) {
	    int intStringL = (this.blnStringMaxL)?intStringMaxL - intPatternL:
		rand.nextInt(intStringMaxL - intStringMinL - intPatternL + 1) + intStringMinL;
	    stbExample[i] = new StringBuffer(intStringL + intPatternL);
	    boolean[] blnMix = mix(strPattern.length, intStringL);
	    int k = 0, l = 0, m = 0;
	    for (int j = 0; j < blnMix.length; j++) {
		if (blnMix[j]) {
		    for (int n = m; n < m + strPattern[k].length(); n++) intResultIndex[n][i] = l++;
		    m += strPattern[k].length();
		    stbExample[i].append(strPattern[k++]);
		} else {
		    stbExample[i].append(getLetter());
		    l++;
		}
	    }
	}
	Arrays.fill(intResultIndex[intPatternL], Integer.MAX_VALUE);

	this.strExample = new String[intExamples];
	for (int i = 0; i < intExamples; i++) strExample[i] = stbExample[i].toString();
    }


    public String[] getExample() {
	return strExample;
    }

    public int[][] getIndexTable() {
	return intResultIndex;
    }

    public void generateStrictExample() {
	generateStrictExample(getPattern());
    }

    public void generateStrictExample(String[] strPattern) {
	final int intExamples = (blnMaxExamples)?
	    intMaxExamples:rand.nextInt(intMaxExamples - intMinExamples + 1) + intMinExamples;
	generateStrictExample(strPattern, intExamples);
    }


    public void generateStrictExample(String[] strPattern, int intExamples) {
	int intPatternL = 0;
	for (int i = 0; i < strPattern.length; i++) intPatternL += strPattern[i].length();	

	int intStringL1 = (this.blnStringMaxL)?intStringMaxL - intPatternL:
	    rand.nextInt(intStringMaxL - intStringMinL - intPatternL + 1) + intStringMinL;
	int intStringL2 = (this.blnStringMaxL)?intStringMaxL - intPatternL:
	    rand.nextInt(intStringMaxL - intStringMinL - intPatternL + 1) + intStringMinL;
	StringBuffer stbStrict1 = new StringBuffer(intPatternL + intStringL1);
	StringBuffer stbStrict2 = new StringBuffer(intPatternL + intStringL2);
	HashSet hstA1 = new HashSet(strA.length);
	HashSet hstA2 = new HashSet(strA.length);
	int i1 = 0, i2 = 0, j1 = 0, j2 = 0, k1 = 0, k2 = 0;
	boolean[] blnMix1 = mix(strPattern.length, intStringL1);
	boolean[] blnMix2 = mix(strPattern.length, intStringL2);
	boolean blnNext1 = blnMix1[0];
	boolean blnNext2 = blnMix2[0];
	while ((i1 < blnMix1.length) || (i2 < blnMix2.length)) {
	    String strLetter = getLetter();
	    int intLetter = 0;
	    if (hstA1.contains(strLetter)) intLetter = 1;
	    else if (hstA2.contains(strLetter)) intLetter = 2;

	    if (i1 < blnMix1.length) {
		if (blnMix1[i1]) {
		    stbStrict1.append(strPattern[k1]);
		    k1++; i1++;
		} else if (intLetter == 1) {
		    stbStrict1.append(strLetter);
		    j1++; i1++;
		} else if ((intLetter == 0) && (j1 < j2)) {
		    hstA1.add(strLetter);
		    intLetter = 1;
		    stbStrict1.append(strLetter);
		    j1++; i1++;
		}
	    }
	    if (i2 < blnMix2.length) {
		if (blnMix2[i2]) {
		    stbStrict2.append(strPattern[k2]);
		    k2++; i2++;
		} else if (intLetter == 2) {
		    stbStrict2.append(strLetter);
		    j2++; i2++;
		} else if (intLetter == 0) {
		    hstA2.add(strLetter);
		    intLetter = 2;
		    stbStrict2.append(strLetter);
		    j2++; i2++;
		}
	    }
	}
 	int[] intResultIndex1 = new int[intPatternL + 1];
	int i = 0, j = 0, k = 0, l = 0, m = 0;
	for (i = 0; i < blnMix1.length; i++) {
	    if (blnMix1[i]) {
		for (j = m; j < m + strPattern[k].length(); j++) intResultIndex1[j] = l++;
		m += strPattern[k].length();
		k++;
	    } else l++;
	}
	intResultIndex1[intPatternL] = Integer.MAX_VALUE;
 	int[] intResultIndex2 = new int[intPatternL + 1];
	i = 0; j =0; k = 0; l = 0; m = 0;
	for (i = 0; i < blnMix2.length; i++) {
	    if (blnMix2[i]) {
		for (j = m; j < m + strPattern[k].length(); j++) intResultIndex2[j] = l++;
		m += strPattern[k].length();
		k++;
	    } else l++;
	}
	intResultIndex2[intPatternL] = Integer.MAX_VALUE;


	String[] strUnstrict;
	int[][] intUnstrict;
	if (intExamples > 2) {
	    generateExample(strPattern, intExamples - 2);
	    strUnstrict = getExample();
	    intUnstrict = getIndexTable();
	    i1 = rand.nextInt(strUnstrict.length);
	    i2 = rand.nextInt(strUnstrict.length + 1);
	    if (i2 <= i1) i1++;
	} else {
	    strUnstrict = new String[0];
	    intUnstrict = new int[0][0];
	    i1 = 0;
	    i2 = 1;
	}
	strStrictExample = new String[strUnstrict.length + 2];
	intStrictResultIndex = new int[intPatternL + 1][strUnstrict.length + 2];
	k = 0;
	for (i = 0; i < strStrictExample.length; i++) {
	    if (i == i1) {
		strStrictExample[i] = stbStrict1.toString();
		for (j = 0; j <= intPatternL; j++) intStrictResultIndex[j][i] = intResultIndex1[j];
	    } else if (i == i2) {
		strStrictExample[i] = stbStrict2.toString();
		for (j = 0; j <= intPatternL; j++) intStrictResultIndex[j][i] = intResultIndex2[j];
	    } else {
		strStrictExample[i] = strUnstrict[k];
		for (j = 0; j <= intPatternL; j++) intStrictResultIndex[j][i] = intUnstrict[j][k];
		k++;
	    }
	}
    }

    public String[] getStrictExample() {
	return strStrictExample;
    }

    public int[][] getStrictIndexTable() {
	return intStrictResultIndex;
    }

    public String[] generateCounterExample(String[] strPattern, Pattern patPattern, int intExamples) {
	StringBuffer[] stbExample = new StringBuffer[intExamples];	
	for (int i = 0; i < intExamples; i++) {
	    int intStringL = (blnStringMaxL)?intStringMaxL:
		rand.nextInt(intStringMaxL - intStringMinL + 1) + intStringMinL;
	    stbExample[i] = new StringBuffer(intStringL);
	    boolean blnMatch = true;
	    for(int k = 0; (k < 1000) && blnMatch; k++) {
		for (int j = 0; j < intStringL; j++) stbExample[i].replace(j, j + 1, getLetter());
		blnMatch = patPattern.matcher(stbExample[i].toString().toLowerCase()).matches();
	    }
	    if (blnMatch) {
		String strForbidden = strPattern[rand.nextInt(strPattern.length)].substring(0, 1);
		String strLetter;
		for (int j = 0; j < intStringL; j++) {
		    do { strLetter = getLetter(); } while (strForbidden.equals(strLetter));
		    stbExample[i].replace(j, j + 1, strLetter);
		}
	    }
	}

	String[] strExample = new String[intExamples];
	for (int i = 0; i < intExamples; i++) strExample[i] = stbExample[i].toString();
	return strExample;
    }




    

    public static void main(String[] args) {
	PatternGenerator pgnThis = new PatternGenerator();


	pgnThis.generatePattern();
	String[] strPattern = pgnThis.getPattern();
	String strOriginal = "x"; for (int i = 0; i < strPattern.length; i++) strOriginal += strPattern[i] + "x";
	pgnThis.generateExample(strPattern);
	pgnThis.generateStrictExample(strPattern);
	String[] strE = pgnThis.getExample();
	String[] strSE = pgnThis.getStrictExample();

	Bonsai bnsThis = new Bonsai();
	bnsThis.process(strE);	
	strPattern = bnsThis.getResultPattern();
	String strUnstrict = "x"; for (int i = 0; i < strPattern.length; i++) strUnstrict += strPattern[i] + "x";
	System.out.println("\n*** unstrict ***");
	bnsThis.printResult();
	bnsThis.process(strSE);
	strPattern = bnsThis.getResultPattern();
	String strStrict = "x"; for (int i = 0; i < strPattern.length; i++) strStrict += strPattern[i] + "x";
	System.out.println("\n*** strict ***");
	bnsThis.printResult();

	System.out.println(
			   "Pattern:" +
			   "\noriginal: " + strOriginal + 
			   "\nstrict:   " + strStrict +
			   "\nunstrict: " + strUnstrict
			   );
	System.out.println("Beispiele (unstrict):");
	for (int i = 0; i < strE.length; i++) {
	    int k = 0;
	    for (int j = 0; j < strE[i].length(); j++) {
		if (pgnThis.intResultIndex[k][i] == j) {		
		    System.out.print(strE[i].charAt(j));
		    k++;
		} else {
		    System.out.print(Character.toLowerCase(strE[i].charAt(j)));
		}
	    }
	    System.out.println();
	}
	System.out.println("Beispiele (strict):");
	for (int i = 0; i < strSE.length; i++) {
	    int k = 0;
	    for (int j = 0; j < strSE[i].length(); j++) {
		if (pgnThis.intStrictResultIndex[k][i] == j) {
		    System.out.print(strSE[i].charAt(j));
		    k++;
		} else {
		    System.out.print(Character.toLowerCase(strSE[i].charAt(j)));
		}
	    }
	    System.out.println();
	}

	pgnThis.generateExample(strPattern, 1);
	String[] strGp = pgnThis.getExample();
	String[] strGn = pgnThis.generateCounterExample(strPattern, bnsThis.getPattern(), 3);

	System.out.println("Guess:");
	for (int i = 0; i < strGp.length; i++) System.out.println(strGp[i]);
	for (int i = 0; i < strGn.length; i++) System.out.println(strGn[i]);

    }
}
