Coverage Report - org.omegat.filters2.latex.LatexFilter
 
Classes in this File Line Coverage Branch Coverage Complexity
LatexFilter
2%
6/271
0%
0/100
5
 
 1  
 /**************************************************************************
 2  
  OmegaT - Computer Assisted Translation (CAT) tool
 3  
           with fuzzy matching, translation memory, keyword search,
 4  
           glossaries, and translation leveraging into updated projects.
 5  
 
 6  
  Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
 7  
                2006 Thomas Huriaux
 8  
                2008 Martin Fleurke
 9  
                2009 Arno Peters
 10  
                Home page: http://www.omegat.org/
 11  
                Support center: http://groups.yahoo.com/group/OmegaT/
 12  
 
 13  
  This program is free software; you can redistribute it and/or modify
 14  
  it under the terms of the GNU General Public License as published by
 15  
  the Free Software Foundation; either version 2 of the License, or
 16  
  (at your option) any later version.
 17  
 
 18  
  This program is distributed in the hope that it will be useful,
 19  
  but WITHOUT ANY WARRANTY; without even the implied warranty of
 20  
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 21  
  GNU General Public License for more details.
 22  
 
 23  
  You should have received a copy of the GNU General Public License
 24  
  along with this program; if not, write to the Free Software
 25  
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 26  
  **************************************************************************/
 27  
 
 28  
 package org.omegat.filters2.latex;
 29  
 
 30  
 import java.io.BufferedReader;
 31  
 import java.io.BufferedWriter;
 32  
 import java.io.IOException;
 33  
 import java.io.Writer;
 34  
 import java.util.Iterator;
 35  
 import java.util.LinkedList;
 36  
 import java.util.ListIterator;
 37  
 import java.util.regex.Matcher;
 38  
 import java.util.regex.Pattern;
 39  
 
 40  
 import org.omegat.filters2.AbstractFilter;
 41  
 import org.omegat.filters2.Instance;
 42  
 import org.omegat.util.OStrings;
 43  
 
 44  
 /**
 45  
  * Filter to support LaTeX files.
 46  
  * 
 47  
  * @author Keith Godfrey
 48  
  * @author Maxym Mykhalchuk
 49  
  * @author Thomas Huriaux
 50  
  * @author Martin Fleurke
 51  
  * @author Arno Peters
 52  
  */
 53  582348276
 public class LatexFilter extends AbstractFilter {
 54  
 
 55  
     public String getFileFormatName() {
 56  13064848
         return OStrings.getString("LATEXFILTER_FILTER_NAME");
 57  
     }
 58  
 
 59  
     public Instance[] getDefaultInstances() {
 60  569283428
         return new Instance[] { new Instance("*.tex"), new Instance("*.latex"), };
 61  
     }
 62  
 
 63  
     public boolean isSourceEncodingVariable() {
 64  0
         return true;
 65  
     }
 66  
 
 67  
     public boolean isTargetEncodingVariable() {
 68  0
         return true;
 69  
     }
 70  
 
 71  
     public void processFile(BufferedReader in, BufferedWriter out) throws IOException {
 72  
         // BOM (byte order mark) bugfix
 73  0
         in.mark(1);
 74  0
         int ch = in.read();
 75  0
         if (ch != 0xFEFF)
 76  0
             in.reset();
 77  
 
 78  0
         init();
 79  
 
 80  0
         processLatexFile(in, out);
 81  0
     }
 82  
 
 83  
     private int findStringCategory(String c) {
 84  0
         if (c.equals("\\")) {
 85  0
             return 0;
 86  0
         } else if (c.equals("{")) {
 87  0
             return 1;
 88  0
         } else if (c.equals("}")) {
 89  0
             return 2;
 90  0
         } else if (c.equals("$")) {
 91  0
             return 3;
 92  0
         } else if (c.equals("&")) {
 93  0
             return 4;
 94  0
         } else if (c.equals("\n")) {
 95  0
             return 5;
 96  0
         } else if (c.equals("#")) {
 97  0
             return 6;
 98  0
         } else if (c.equals("^")) {
 99  0
             return 7;
 100  0
         } else if (c.equals("_")) {
 101  0
             return 8;
 102  0
         } else if (c.equals("\000")) {
 103  0
             return 9;
 104  0
         } else if (c.matches("[ \t]")) {
 105  0
             return 10;
 106  0
         } else if (c.matches("[a-zA-Z]")) {
 107  0
             return 11;
 108  0
         } else if (c.equals("~")) {
 109  0
             return 13;
 110  0
         } else if (c.equals("%")) {
 111  0
             return 14;
 112  
         }
 113  
 
 114  0
         return 12;
 115  
     }
 116  
 
 117  
     /**
 118  
      * Processes a LaTeX document
 119  
      * 
 120  
      * @param in
 121  
      *            Source document
 122  
      * @param out
 123  
      *            Target document
 124  
      * @throws java.io.IOException
 125  
      */
 126  
     private void processLatexFile(BufferedReader in, Writer out) throws IOException {
 127  0
         StringBuffer par = new StringBuffer();
 128  
         String s;
 129  
 
 130  0
         LinkedList<String> commands = new LinkedList<String>();
 131  
 
 132  
         /**
 133  
          * Possible states: N: beginning of a new line M: middle S: skipping
 134  
          * blanks
 135  
          */
 136  
         String state;
 137  0
         while ((s = in.readLine()) != null) {
 138  0
             String[] c = s.split("");
 139  0
             state = "N";
 140  
 
 141  0
             int idx = 1;
 142  0
             while (idx < c.length) {
 143  0
                 String cidx = c[idx];
 144  0
                 int cat = findStringCategory(cidx);
 145  
 
 146  0
                 if (cat == 0) {
 147  
                     /* parse control sequence */
 148  0
                     StringBuffer cmd = new StringBuffer();
 149  0
                     cmd.append(cidx);
 150  0
                     idx++;
 151  0
                     while (idx < c.length) {
 152  0
                         String cmdc = c[idx];
 153  0
                         if (findStringCategory(cmdc) == 11) {
 154  0
                             cmd.append(cmdc);
 155  0
                         } else if (cmd.length() == 1) {
 156  0
                             cmd.append(cmdc);
 157  0
                             state = "M";
 158  0
                             break;
 159  
                         } else {
 160  0
                             idx--;
 161  
                             // state = "S";
 162  0
                             state = "M";
 163  0
                             break;
 164  
                         }
 165  0
                         idx++;
 166  0
                     }
 167  
 
 168  0
                     if (!commands.contains(cmd.toString()))
 169  0
                         commands.add(cmd.toString());
 170  0
                     par.append(cmd);
 171  0
                 } else if (cat == 4) {
 172  
                     /* table column separator */
 173  0
                     out.write(processParagraph(commands, par.toString()));
 174  0
                     out.write("&");
 175  0
                     par.setLength(0);
 176  
                     // System.out.println(commands);
 177  0
                     commands.clear();
 178  0
                 } else if (cat == 10) {
 179  0
                     if (state.equals("M")) {
 180  0
                         state = "S";
 181  0
                         par.append(cidx);
 182  
                     }
 183  0
                 } else if (cat == 14) {
 184  
                     /* parse comment */
 185  0
                     StringBuffer comment = new StringBuffer();
 186  0
                     comment.append(cidx);
 187  0
                     idx++;
 188  0
                     while (idx < c.length) {
 189  0
                         String commentc = c[idx];
 190  0
                         comment.append(commentc);
 191  0
                         idx++;
 192  0
                     }
 193  
                     // state = "N";
 194  
                     // out.write("\n");
 195  0
                 } else {
 196  0
                     state = "M";
 197  0
                     par.append(cidx);
 198  
                 }
 199  
 
 200  0
                 idx++;
 201  0
             }
 202  
 
 203  
             /* at the end of the line */
 204  0
             if (state.equals("N")) {
 205  
                 /* \par */
 206  0
                 out.write(processParagraph(commands, par.toString()));
 207  0
                 out.write("\n\n");
 208  0
                 par.setLength(0);
 209  
                 // System.out.println(commands);
 210  0
                 commands.clear();
 211  0
             } else if (state.equals("M")) {
 212  0
                 par.append(" ");
 213  
             }
 214  0
         }
 215  
 
 216  
         // output remaining buffers
 217  0
         if (par.length() > 0)
 218  0
             out.write(processParagraph(commands, par.toString()));
 219  
 
 220  0
     }
 221  
 
 222  
     private String substituteUnicode(String par) {
 223  0
         par = par.replaceAll("\\\\\\\\", "<br0>");
 224  0
         par = par.replaceAll("\\{?\\\\ss\\}?", "ß");
 225  0
         par = par.replaceAll("\\{?\\\\glqq\\}?(\\{\\})?", "\u301f");
 226  0
         par = par.replaceAll("\\{?\\\\grqq\\}?(\\{\\})?", "\u301d");
 227  0
         par = par.replaceAll("\\{?\\\\glq\\}?(\\{\\})?", "\u201a");
 228  0
         par = par.replaceAll("\\{?\\\\grq\\}?(\\{\\})?", "\u2018");
 229  0
         par = par.replaceAll("\\\\%", "%");
 230  0
         par = par.replaceAll("\\\\-", "\u00ad");
 231  0
         par = par.replaceAll("\\\\,", "\u2009");
 232  0
         par = par.replaceAll("~", "\u00a0");
 233  0
         return par;
 234  
     }
 235  
 
 236  
     private String resubstituteTex(String par) {
 237  0
         par = par.replaceAll("\u00a0", "~");
 238  0
         par = par.replaceAll("\u2009", "\\\\,");
 239  0
         par = par.replaceAll("\u00ad", "\\\\-");
 240  0
         par = par.replaceAll("%", "\\\\%");
 241  0
         par = par.replaceAll("<br0>", "\\\\\\\\");
 242  0
         return par;
 243  
     }
 244  
 
 245  582348276
     private LinkedList<String> oneArgNoText = new LinkedList<String>();
 246  582348276
     private LinkedList<String> oneArgInlineText = new LinkedList<String>();
 247  582348276
     private LinkedList<String> oneArgParText = new LinkedList<String>();
 248  
 
 249  
     private void init() {
 250  0
         oneArgNoText.add("\\begin");
 251  0
         oneArgNoText.add("\\end");
 252  0
         oneArgNoText.add("\\cite");
 253  0
         oneArgNoText.add("\\label");
 254  0
         oneArgNoText.add("\\ref");
 255  0
         oneArgNoText.add("\\pageref");
 256  0
         oneArgNoText.add("\\pagestyle");
 257  0
         oneArgNoText.add("\\thispagestyle");
 258  0
         oneArgNoText.add("\\vspace");
 259  0
         oneArgNoText.add("\\hspace");
 260  0
         oneArgNoText.add("\\vskip");
 261  0
         oneArgNoText.add("\\hskip");
 262  0
         oneArgNoText.add("\\put");
 263  0
         oneArgNoText.add("\\includegraphics");
 264  0
         oneArgNoText.add("\\documentclass");
 265  0
         oneArgNoText.add("\\usepackage");
 266  
 
 267  0
         oneArgInlineText.add("\\emph");
 268  0
         oneArgInlineText.add("\\textbf");
 269  0
         oneArgInlineText.add("\\texttt");
 270  0
         oneArgInlineText.add("\\textsf");
 271  0
         oneArgInlineText.add("\\textit");
 272  0
         oneArgInlineText.add("\\hbox");
 273  0
         oneArgInlineText.add("\\mbox");
 274  0
         oneArgInlineText.add("\\vbox");
 275  
 
 276  0
         oneArgParText.add("\\typeout");
 277  0
         oneArgParText.add("\\footnote");
 278  0
         oneArgParText.add("\\author");
 279  0
         oneArgParText.add("\\index");
 280  0
         oneArgParText.add("\\title");
 281  0
         oneArgParText.add("\\Chapter");
 282  0
         oneArgParText.add("\\chapter");
 283  0
         oneArgParText.add("\\section");
 284  0
     }
 285  
 
 286  
     private String replaceOneArgNoText(LinkedList<String[]> substituted, LinkedList<String> commands,
 287  
             String par) {
 288  0
         int counter = 0;
 289  
 
 290  0
         for (Iterator<String> it = commands.iterator(); it.hasNext();) {
 291  0
             String command = it.next();
 292  
 
 293  0
             StringBuffer sb = new StringBuffer();
 294  
 
 295  0
             if (oneArgNoText.contains(command)) {
 296  0
                 String find = ("\\" + command + "\\*?" + "(" + "\\[" + "[^\\]]*" + "\\]" + // opt
 297  
                                                                                            // []
 298  
                                                                                            // arg
 299  
                         "|" + "\\(" + "[^\\)]*" + "\\)" + // opt () arg
 300  
                         ")?\\s*" + "\\{" + "[^\\}]*+" + "\\}");
 301  
 
 302  0
                 Pattern p = Pattern.compile(find);
 303  0
                 Matcher m = p.matcher(par);
 304  0
                 while (m.find()) {
 305  0
                     String replace = "<n" + String.valueOf(counter) + ">";
 306  0
                     String[] subst = { reHarden(m.group(0)), reHarden(replace) };
 307  0
                     substituted.addFirst(subst);
 308  0
                     m.appendReplacement(sb, replace);
 309  0
                     counter++;
 310  0
                 }
 311  0
                 m.appendTail(sb);
 312  
 
 313  0
                 par = sb.toString();
 314  
             }
 315  0
         }
 316  0
         return par;
 317  
     }
 318  
 
 319  
     private String replaceOneArgInlineText(LinkedList<String[]> substituted, LinkedList<String> commands,
 320  
             String par) {
 321  0
         int counter = 0;
 322  
 
 323  0
         for (Iterator<String> it = commands.iterator(); it.hasNext();) {
 324  0
             String command = it.next();
 325  
 
 326  0
             StringBuffer sb = new StringBuffer();
 327  
 
 328  0
             if (oneArgInlineText.contains(command)) {
 329  0
                 String find = ("(" + "\\" + command + "\\s*" + "\\{" + ")" + "(" + "[^\\}]*+" + ")" + "\\}");
 330  
 
 331  0
                 Pattern p = Pattern.compile(find);
 332  0
                 Matcher m = p.matcher(par);
 333  0
                 while (m.find()) {
 334  0
                     String preReplace = "<i" + String.valueOf(counter) + ">";
 335  0
                     String postReplace = "</i" + String.valueOf(counter) + ">";
 336  
 
 337  0
                     String[] s1 = { reHarden(m.group(1)), reHarden(preReplace) };
 338  0
                     substituted.addFirst(s1);
 339  
 
 340  0
                     String[] s2 = { reHarden("}"), reHarden(postReplace) };
 341  0
                     substituted.addFirst(s2);
 342  
 
 343  0
                     String replace = (preReplace + "$2" + postReplace);
 344  0
                     m.appendReplacement(sb, replace);
 345  0
                     counter++;
 346  0
                 }
 347  0
                 m.appendTail(sb);
 348  
 
 349  0
                 par = sb.toString();
 350  
             }
 351  0
         }
 352  0
         return par;
 353  
     }
 354  
 
 355  
     private String replaceOneArgParText(LinkedList<String[]> substituted, LinkedList<String> commands,
 356  
             String par) {
 357  0
         int counter = 0;
 358  
 
 359  0
         for (Iterator<String> it = commands.iterator(); it.hasNext();) {
 360  0
             String command = it.next();
 361  
 
 362  0
             StringBuffer sb = new StringBuffer();
 363  
 
 364  0
             if (oneArgParText.contains(command)) {
 365  0
                 String find = ("(" + "\\" + command + "\\*?\\s*" + ")" + "\\{" + "(" + "[^\\}]*+" + ")" + "\\}");
 366  
 
 367  0
                 Pattern p = Pattern.compile(find);
 368  0
                 Matcher m = p.matcher(par);
 369  0
                 while (m.find()) {
 370  0
                     String replace = "<p" + String.valueOf(counter) + ">";
 371  0
                     String content = "";
 372  0
                     if (m.group(2) != null)
 373  0
                         content = processParagraph(commands, m.group(2));
 374  
 
 375  0
                     String[] subst = { reHarden(m.group(1) + "{" + content + "}"), reHarden(replace) };
 376  
 
 377  0
                     substituted.addFirst(subst);
 378  0
                     m.appendReplacement(sb, replace);
 379  0
                     counter++;
 380  0
                 }
 381  0
                 m.appendTail(sb);
 382  
 
 383  0
                 par = sb.toString();
 384  
             }
 385  0
         }
 386  0
         return par;
 387  
     }
 388  
 
 389  
     private String replaceUnknownCommand(LinkedList<String[]> substituted, LinkedList<String> commands,
 390  
             String par) {
 391  0
         int counter = 0;
 392  
 
 393  0
         for (Iterator<String> it = commands.iterator(); it.hasNext();) {
 394  0
             String command = it.next();
 395  
 
 396  0
             if (command.equals("\\\\") || command.equals("\\{") || command.equals("\\["))
 397  
                 // continue;
 398  0
                 command = "\\" + command;
 399  
 
 400  0
             StringBuffer sb = new StringBuffer();
 401  0
             String find = "\\" + command;
 402  
 
 403  0
             Pattern p = Pattern.compile(find);
 404  0
             Matcher m = p.matcher(par);
 405  0
             while (m.find()) {
 406  0
                 String replace = "<u" + String.valueOf(counter) + ">";
 407  0
                 String[] subst = { reHarden(m.group(0)), reHarden(replace) };
 408  0
                 substituted.addFirst(subst);
 409  0
                 m.appendReplacement(sb, replace);
 410  0
                 counter++;
 411  0
             }
 412  0
             m.appendTail(sb);
 413  
 
 414  0
             par = sb.toString();
 415  0
         }
 416  0
         return par;
 417  
     }
 418  
 
 419  
     private String reHarden(String re) {
 420  0
         re = re.replaceAll("\\\\", "\\\\\\\\"); // replace \ with \\
 421  0
         re = re.replaceAll("\\[", "\\\\[");
 422  0
         re = re.replaceAll("\\^", "\\\\^");
 423  0
         re = re.replaceAll("\\$", "\\\\\\$");
 424  0
         re = re.replaceAll("\\{", "\\\\{");
 425  0
         return re;
 426  
     }
 427  
 
 428  
     private String processParagraph(LinkedList<String> commands, String par) {
 429  0
         LinkedList<String[]> substituted = new LinkedList<String[]>();
 430  
 
 431  0
         par = substituteUnicode(par);
 432  
 
 433  0
         par = replaceOneArgNoText(substituted, commands, par);
 434  0
         par = replaceOneArgInlineText(substituted, commands, par);
 435  0
         par = replaceOneArgParText(substituted, commands, par);
 436  0
         par = replaceUnknownCommand(substituted, commands, par);
 437  
 
 438  0
         String find = ("^((\\s*</?[nipu]\\d+>\\s*)*)" + "(.*?)" + "((\\s*</?[nipu]\\d+>\\s*)*)$");
 439  0
         Pattern p = Pattern.compile(find);
 440  0
         Matcher m = p.matcher(par);
 441  0
         if (m.find()) {
 442  0
             par = "";
 443  0
             if (m.group(1) != null)
 444  0
                 par += m.group(1);
 445  0
             if (m.group(3) != null)
 446  0
                 par += processEntry(m.group(3));
 447  0
             if (m.group(4) != null)
 448  0
                 par += m.group(4);
 449  
         }
 450  
 
 451  0
         par = resubstituteTex(par);
 452  
 
 453  0
         ListIterator<String[]> it = substituted.listIterator();
 454  0
         while (it.hasNext()) {
 455  0
             String[] subst = it.next();
 456  0
             par = par.replaceAll(subst[1], subst[0]);
 457  0
         }
 458  
 
 459  0
         return par;
 460  
     }
 461  
 
 462  
 }