/*
Bonsai
Copyright (C) 2003 Bj\"orn Hoffmeister

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

package de.uni_luebeck.tcs.demo.datamining;

import java.io.*;
import java.util.*;
import java.util.regex.*;

public class Bonsai {
    private static final int ALPHABET_SIZE = 64;

    private static final class Int {
	int intValue;
    }

    private String[] strData;
    private int intMinLength;
    private HashMap[] hmpPos;
    private Character[] chaAlphabet;
    private int[][] intResultIndex;
    private String strResult;
    private String[] strPattern;
    private Pattern patResult;
    private boolean[] blnResult;

    private void read(BufferedReader bfrIn) throws IOException {
	final ArrayList altIn = new ArrayList(16);
	String strIn;
	while ((strIn = bfrIn.readLine()) != null) {
	    strIn = strIn.trim().toLowerCase();
	    if (strIn.length() > 0) altIn.add(strIn);
	}
	buildData(altIn);
    }

    private void read(String[] staIn) {	
	ArrayList altIn = new ArrayList(staIn.length);	
	for (int i = 0; i < staIn.length; i++) {
	    if (staIn[i] != null) {
		String strIn = staIn[i].trim().toLowerCase();
		if (strIn.length() > 0) altIn.add(strIn);
	    }
	}
	buildData(altIn);
    }

    private final void buildData(ArrayList altIn) {
	strData = new String[altIn.size()];
	Iterator iteIn = altIn.iterator();
	int i = 0;
	intMinLength = Integer.MAX_VALUE;
	while (iteIn.hasNext()) {
	    strData[i] = (String)iteIn.next();
	    if (strData[i].length() < intMinLength) intMinLength = strData[i].length();
	    i++;
	}
    }

    private void register() {
	HashSet hstAlphabet = new HashSet(ALPHABET_SIZE * 4 / 3);
	hmpPos = new HashMap[strData.length];

	for (int i = 0; i < strData.length; i++) {
	    hmpPos[i] = new HashMap(ALPHABET_SIZE * 4 / 3);
	    int[] itaTmp;
	    HashMap hmpLast = new HashMap(ALPHABET_SIZE * 4 / 3);
	    Int intLast;
	    for (int j = 0; j < strData[i].length(); j++) {
		Character chrKey = new Character(strData[i].charAt(j));
		hstAlphabet.add(chrKey);
		
		if ((itaTmp = (int[])hmpPos[i].get(chrKey)) == null) {
		    itaTmp = new int[strData[i].length() + 1];
		    Arrays.fill(itaTmp, Integer.MAX_VALUE);
		    hmpPos[i].put(chrKey, itaTmp);
		}		     
		if ((intLast = (Int)hmpLast.get(chrKey)) == null) {
		    intLast = new Int();
		    intLast.intValue = -1;
		    hmpLast.put(chrKey, intLast);
		}
		
		Arrays.fill(itaTmp, intLast.intValue + 1, j + 1, j);
		intLast.intValue = j;
	    }	    
	}
	chaAlphabet = new Character[hstAlphabet.size()];
	Iterator iteAlphabet = hstAlphabet.iterator();
	int l = 0;
	while (iteAlphabet.hasNext()) {
	    chaAlphabet[l++] = (Character)iteAlphabet.next();
	}

	/* BEGIN DEBUG
	for (int i = 0; i < strData.length; i++) {
	    System.out.print("  ");
	    for (int j = 0; j < strData[i].length(); j++) {
		if (j < 10) System.out.print("  " + j);
		else System.out.print(" " + j);
	    }
	    System.out.print("\n  ");
	    for (int j = 0; j < strData[i].length(); j++) {
		System.out.print("  " + strData[i].charAt(j));
	    }
	    for (int k = 0; k < chaAlphabet.length; k++) {
		int[] itaPos = (int[])hmpPos[i].get(chaAlphabet[k]);
		if (itaPos != null) {
		    System.out.print("\n" + chaAlphabet[k] + ":");		    
		    for (int j = 0; j < itaPos.length; j++) {
			if (itaPos[j] < 10) System.out.print("  " + itaPos[j]);
			else if (itaPos[j] < Integer.MAX_VALUE) System.out.print(" " + itaPos[j]);
			else System.out.print("  x");
		    }
		}
	    }
	    System.out.print("\n\n");
	}
	END DEBUG */
    }		    

    private void find() {
	StringBuffer stbResult = new StringBuffer(2 * intMinLength + 1);
	StringBuffer stbRegEx = new StringBuffer(3 * intMinLength + 2);
	boolean[] blnResult = new boolean[2 * intMinLength + 1];
	int intResultCounter = 0;
	ArrayList altResultIndex = new ArrayList(intMinLength);
	int[] intPos = new int[strData.length];
	boolean blnContinue = true;

	while (blnContinue) {
	    //	    int[] intMax = new int[chaAlphabet.length];
	    int[] intMin = new int[chaAlphabet.length];
	    Arrays.fill(intMin, Integer.MAX_VALUE);
	    int[] itaTmp;
	    for (int i = 0; i < strData.length; i++) {
		for (int j = 0; j < chaAlphabet.length; j++) {
		    if ((itaTmp = (int[])(hmpPos[i].get(chaAlphabet[j]))) == null) {
			//			intMax[j] = Integer.MAX_VALUE;
			intMin[j] = -1;
		    } else {
			int intCFB = itaTmp[intPos[i]];
			//			if (intCFB > intMax[j]) intMax[j] = intCFB;
			int intCUE = (intCFB == Integer.MAX_VALUE)
			    ?-1:strData[i].length() - intCFB - 1;
			if (intCUE < intMin[j]) intMin[j] = intCUE;			
		    }
		}
	    }

	    /*
	    int intMaxMin = intMax[0];
	    int intMaxMinIndex = 0;
	    for (int i = 1; i < intMax.length; i++) if (intMax[i] < intMaxMin) {
		intMaxMin = intMax[i];
		intMaxMinIndex = i;
	    }
	    blnContinue = (intMaxMin != Integer.MAX_VALUE);
	    int intIndex = intMaxMinIndex;
	    */

	    int intMinMax = intMin[0];
	    int intMinMaxIndex = 0;
	    for (int i = 1; i < intMin.length; i++) if (intMin[i] > intMinMax) {
		intMinMax = intMin[i];
		intMinMaxIndex = i;
	    }
	    blnContinue = (intMinMax != -1);
	    int intIndex = intMinMaxIndex;

	    if (blnContinue) {		
		Character chrWinner = chaAlphabet[intIndex];
		boolean blnGlue = true;
		for (int i = 0; i < strData.length; i++) {
		    int intPrev = intPos[i];
		    intPos[i] = ((int[])(hmpPos[i].get(chrWinner)))[intPos[i]] + 1;
		    if (intPos[i] - intPrev > 1) blnGlue = false;
		}
		if (!blnGlue) {
		    stbResult.append('x');
		    stbRegEx.append(".*");
		    blnResult[intResultCounter++] = false;
		}
		stbResult.append(Character.toUpperCase(chrWinner.charValue()));
		stbRegEx.append(chrWinner.charValue());
		blnResult[intResultCounter++] = true;
		altResultIndex.add(intPos.clone());
	    } else {
		for (int i = 0; i < strData.length; i++) if (intPos[i] < strData[i].length()) {
		    stbResult.append('x');
		    stbRegEx.append(".*");
		    blnResult[intResultCounter++] = false;
		    break;
		}
	    }
	}
	
	intResultIndex = new int[altResultIndex.size() + 1][];
	for (int i = 0; i < altResultIndex.size(); i++) {
	    intResultIndex[i] = (int[])(altResultIndex.get(i));
	    for (int j = 0; j < strData.length; j++) intResultIndex[i][j]--;
	}
	intResultIndex[altResultIndex.size()] = new int[strData.length];
	Arrays.fill(intResultIndex[altResultIndex.size()], Integer.MAX_VALUE);
	strResult = stbResult.toString();
	patResult = Pattern.compile(stbRegEx.toString());
	this.blnResult = new boolean[strResult.length()];
	System.arraycopy(blnResult, 0, this.blnResult, 0, this.blnResult.length);

	StringTokenizer stzPattern = new StringTokenizer(strResult, "x");
	this.strPattern = new String[stzPattern.countTokens()];
	for (int i = 0; i < strPattern.length; i++) strPattern[i] = stzPattern.nextToken();
    }

    public void process(BufferedReader bfrIn) throws IOException {
	read(bfrIn);
	register();
	find();
    }

    public void process(String[] strIn) {
	read(strIn);
	register();
	find();
    }

    public String getResult() {
	return strResult;
    }

    public String[] getResultPattern() {
	return strPattern;
    }

    public Pattern getPattern() {
	return patResult;
    }

    public boolean[] getResultType() {
	return blnResult;
    }

    public String[] getData() {
	return strData;
    }

    public int[][] getIndexTable() {
	return intResultIndex;
    }

    public void printResult() {
	for (int i = 0; i < strData.length; i++) {
	    int k =0;
	    for (int j = 0; j < strData[i].length(); j++) {
		if (j == intResultIndex[k][i]) {
		    System.out.print(Character.toUpperCase(strData[i].charAt(j)));
		    k++;
		} else {
		    System.out.print(strData[i].charAt(j));
		}
	    }
	    System.out.println((patResult.matcher(strData[i]).matches()?" (ok)":" (  )"));
	}
	for (int i = 0; i < strResult.length(); i++) System.out.print("-");
	System.out.println("\n" + strResult + " (" + patResult.pattern() + ")");
	for (int i = 0; i < strResult.length(); i++) System.out.print("-");
	System.out.println();
	for (int i = 0; i < blnResult.length; i++) if (blnResult[i]) System.out.print(strResult.charAt(i));
	System.out.println("\n");
    }

    public static void main(String[] args) throws IOException {
	Bonsai bonSai = new Bonsai();
	BufferedReader bfrIn = new BufferedReader(new FileReader(args[0]));
	bonSai.read(bfrIn);
	bfrIn.close();
	
	bonSai.register();
	bonSai.find();
	bonSai.printResult();
    }
}
