package lkformat2;
import java.io.*;
import java.util.regex.*;
public class EuroparlToLK {
private static final String ENCODING = "UTF-8";
private static int idCounter = 0;
public static void main(String[] argv) {
String fileName = argv[0];
String outTextFile = argv[1];
String outStructureFile = argv[2];
String outRawFile = argv[3];
try {
BufferedReader br = new BufferedReader(new FileReader(fileName));
PrintWriter textOut = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outTextFile), ENCODING));
PrintWriter structOut = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outStructureFile), ENCODING));
PrintWriter rawOut = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outRawFile), ENCODING));
int ix = fileName.lastIndexOf('/');
String baseName = ix == -1? fileName: fileName.substring(ix + 1);
String isoDate = extractDate(baseName);
textOut.println("");
textOut.println("");
textOut.println("");
textOut.println(" " + baseName + "");
textOut.println(" " + isoDate + "");
textOut.println("");
textOut.print("");
structOut.println("");
structOut.println("");
structOut.println("");
structOut.println(" " + outTextFile + "");
structOut.println(" EuroparlToLK");
structOut.println("");
structOut.println("");
rawOut.println("DUMMY");
int position = 0;
int chStart = Integer.MIN_VALUE;
int spStart = Integer.MIN_VALUE;
String line = br.readLine();
while(line != null) {
line = line.trim();
if(line.startsWith("<")) {
if(line.startsWith("")) {
printEntity("p", position, position, structOut);
} else
throw new RuntimeException("line = " + line);
} else {
if(line.startsWith("(")) {
if(spStart != Integer.MIN_VALUE) {
printEntity("speaker", spStart, position - 1, structOut);
spStart = Integer.MIN_VALUE;
}
}
line = line.replaceAll("' s ", "'s ");
textOut.println(line);
position += line.length() + 1;
rawOut.println(line);
//rawOut.println();
}
line = br.readLine();
}
if(spStart != Integer.MIN_VALUE)
printEntity("speaker", spStart, position - 1, structOut);
if(chStart != Integer.MIN_VALUE)
printEntity("chapter", chStart, position - 1, structOut);
textOut.println("");
textOut.println("");
textOut.close();
structOut.println("");
structOut.println("");
structOut.close();
rawOut.close();
} catch(Exception e) {
e.printStackTrace();
System.exit(1);
}
}
private static void printEntity(String l, int start, int end,
PrintWriter out) {
StringBuilder sb = new StringBuilder(" ");
sb.append(l);
sb.append("");
out.println(sb);
}
private static Pattern DATE_PATTERN = Pattern.compile("ep-(..)-(..)-(..)\\.txt");
private static String extractDate(String d) {
Matcher m = DATE_PATTERN.matcher(d);
if(!m.matches())
throw new RuntimeException("Couldn't extract date");
String yy = m.group(1);
String mm = m.group(2);
String dd = m.group(3);
if(yy.startsWith("9"))
yy = "19" + yy;
else
yy = "20" + yy;
return yy + "-" + mm + "-" + dd;
}
}