1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
|
23 | |
|
24 | |
|
25 | |
|
26 | |
|
27 | |
|
28 | |
package org.omegat.filters2.latex; |
29 | |
|
30 | |
import java.io.BufferedReader; |
31 | |
import java.io.BufferedWriter; |
32 | |
import java.io.IOException; |
33 | |
import java.io.Writer; |
34 | |
import java.util.Iterator; |
35 | |
import java.util.LinkedList; |
36 | |
import java.util.ListIterator; |
37 | |
import java.util.regex.Matcher; |
38 | |
import java.util.regex.Pattern; |
39 | |
|
40 | |
import org.omegat.filters2.AbstractFilter; |
41 | |
import org.omegat.filters2.Instance; |
42 | |
import org.omegat.util.OStrings; |
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | 582348276 | public class LatexFilter extends AbstractFilter { |
54 | |
|
55 | |
public String getFileFormatName() { |
56 | 13064848 | return OStrings.getString("LATEXFILTER_FILTER_NAME"); |
57 | |
} |
58 | |
|
59 | |
public Instance[] getDefaultInstances() { |
60 | 569283428 | return new Instance[] { new Instance("*.tex"), new Instance("*.latex"), }; |
61 | |
} |
62 | |
|
63 | |
public boolean isSourceEncodingVariable() { |
64 | 0 | return true; |
65 | |
} |
66 | |
|
67 | |
public boolean isTargetEncodingVariable() { |
68 | 0 | return true; |
69 | |
} |
70 | |
|
71 | |
public void processFile(BufferedReader in, BufferedWriter out) throws IOException { |
72 | |
|
73 | 0 | in.mark(1); |
74 | 0 | int ch = in.read(); |
75 | 0 | if (ch != 0xFEFF) |
76 | 0 | in.reset(); |
77 | |
|
78 | 0 | init(); |
79 | |
|
80 | 0 | processLatexFile(in, out); |
81 | 0 | } |
82 | |
|
83 | |
private int findStringCategory(String c) { |
84 | 0 | if (c.equals("\\")) { |
85 | 0 | return 0; |
86 | 0 | } else if (c.equals("{")) { |
87 | 0 | return 1; |
88 | 0 | } else if (c.equals("}")) { |
89 | 0 | return 2; |
90 | 0 | } else if (c.equals("$")) { |
91 | 0 | return 3; |
92 | 0 | } else if (c.equals("&")) { |
93 | 0 | return 4; |
94 | 0 | } else if (c.equals("\n")) { |
95 | 0 | return 5; |
96 | 0 | } else if (c.equals("#")) { |
97 | 0 | return 6; |
98 | 0 | } else if (c.equals("^")) { |
99 | 0 | return 7; |
100 | 0 | } else if (c.equals("_")) { |
101 | 0 | return 8; |
102 | 0 | } else if (c.equals("\000")) { |
103 | 0 | return 9; |
104 | 0 | } else if (c.matches("[ \t]")) { |
105 | 0 | return 10; |
106 | 0 | } else if (c.matches("[a-zA-Z]")) { |
107 | 0 | return 11; |
108 | 0 | } else if (c.equals("~")) { |
109 | 0 | return 13; |
110 | 0 | } else if (c.equals("%")) { |
111 | 0 | return 14; |
112 | |
} |
113 | |
|
114 | 0 | return 12; |
115 | |
} |
116 | |
|
117 | |
|
118 | |
|
119 | |
|
120 | |
|
121 | |
|
122 | |
|
123 | |
|
124 | |
|
125 | |
|
126 | |
private void processLatexFile(BufferedReader in, Writer out) throws IOException { |
127 | 0 | StringBuffer par = new StringBuffer(); |
128 | |
String s; |
129 | |
|
130 | 0 | LinkedList<String> commands = new LinkedList<String>(); |
131 | |
|
132 | |
|
133 | |
|
134 | |
|
135 | |
|
136 | |
String state; |
137 | 0 | while ((s = in.readLine()) != null) { |
138 | 0 | String[] c = s.split(""); |
139 | 0 | state = "N"; |
140 | |
|
141 | 0 | int idx = 1; |
142 | 0 | while (idx < c.length) { |
143 | 0 | String cidx = c[idx]; |
144 | 0 | int cat = findStringCategory(cidx); |
145 | |
|
146 | 0 | if (cat == 0) { |
147 | |
|
148 | 0 | StringBuffer cmd = new StringBuffer(); |
149 | 0 | cmd.append(cidx); |
150 | 0 | idx++; |
151 | 0 | while (idx < c.length) { |
152 | 0 | String cmdc = c[idx]; |
153 | 0 | if (findStringCategory(cmdc) == 11) { |
154 | 0 | cmd.append(cmdc); |
155 | 0 | } else if (cmd.length() == 1) { |
156 | 0 | cmd.append(cmdc); |
157 | 0 | state = "M"; |
158 | 0 | break; |
159 | |
} else { |
160 | 0 | idx--; |
161 | |
|
162 | 0 | state = "M"; |
163 | 0 | break; |
164 | |
} |
165 | 0 | idx++; |
166 | 0 | } |
167 | |
|
168 | 0 | if (!commands.contains(cmd.toString())) |
169 | 0 | commands.add(cmd.toString()); |
170 | 0 | par.append(cmd); |
171 | 0 | } else if (cat == 4) { |
172 | |
|
173 | 0 | out.write(processParagraph(commands, par.toString())); |
174 | 0 | out.write("&"); |
175 | 0 | par.setLength(0); |
176 | |
|
177 | 0 | commands.clear(); |
178 | 0 | } else if (cat == 10) { |
179 | 0 | if (state.equals("M")) { |
180 | 0 | state = "S"; |
181 | 0 | par.append(cidx); |
182 | |
} |
183 | 0 | } else if (cat == 14) { |
184 | |
|
185 | 0 | StringBuffer comment = new StringBuffer(); |
186 | 0 | comment.append(cidx); |
187 | 0 | idx++; |
188 | 0 | while (idx < c.length) { |
189 | 0 | String commentc = c[idx]; |
190 | 0 | comment.append(commentc); |
191 | 0 | idx++; |
192 | 0 | } |
193 | |
|
194 | |
|
195 | 0 | } else { |
196 | 0 | state = "M"; |
197 | 0 | par.append(cidx); |
198 | |
} |
199 | |
|
200 | 0 | idx++; |
201 | 0 | } |
202 | |
|
203 | |
|
204 | 0 | if (state.equals("N")) { |
205 | |
|
206 | 0 | out.write(processParagraph(commands, par.toString())); |
207 | 0 | out.write("\n\n"); |
208 | 0 | par.setLength(0); |
209 | |
|
210 | 0 | commands.clear(); |
211 | 0 | } else if (state.equals("M")) { |
212 | 0 | par.append(" "); |
213 | |
} |
214 | 0 | } |
215 | |
|
216 | |
|
217 | 0 | if (par.length() > 0) |
218 | 0 | out.write(processParagraph(commands, par.toString())); |
219 | |
|
220 | 0 | } |
221 | |
|
222 | |
private String substituteUnicode(String par) { |
223 | 0 | par = par.replaceAll("\\\\\\\\", "<br0>"); |
224 | 0 | par = par.replaceAll("\\{?\\\\ss\\}?", "ß"); |
225 | 0 | par = par.replaceAll("\\{?\\\\glqq\\}?(\\{\\})?", "\u301f"); |
226 | 0 | par = par.replaceAll("\\{?\\\\grqq\\}?(\\{\\})?", "\u301d"); |
227 | 0 | par = par.replaceAll("\\{?\\\\glq\\}?(\\{\\})?", "\u201a"); |
228 | 0 | par = par.replaceAll("\\{?\\\\grq\\}?(\\{\\})?", "\u2018"); |
229 | 0 | par = par.replaceAll("\\\\%", "%"); |
230 | 0 | par = par.replaceAll("\\\\-", "\u00ad"); |
231 | 0 | par = par.replaceAll("\\\\,", "\u2009"); |
232 | 0 | par = par.replaceAll("~", "\u00a0"); |
233 | 0 | return par; |
234 | |
} |
235 | |
|
236 | |
private String resubstituteTex(String par) { |
237 | 0 | par = par.replaceAll("\u00a0", "~"); |
238 | 0 | par = par.replaceAll("\u2009", "\\\\,"); |
239 | 0 | par = par.replaceAll("\u00ad", "\\\\-"); |
240 | 0 | par = par.replaceAll("%", "\\\\%"); |
241 | 0 | par = par.replaceAll("<br0>", "\\\\\\\\"); |
242 | 0 | return par; |
243 | |
} |
244 | |
|
245 | 582348276 | private LinkedList<String> oneArgNoText = new LinkedList<String>(); |
246 | 582348276 | private LinkedList<String> oneArgInlineText = new LinkedList<String>(); |
247 | 582348276 | private LinkedList<String> oneArgParText = new LinkedList<String>(); |
248 | |
|
249 | |
private void init() { |
250 | 0 | oneArgNoText.add("\\begin"); |
251 | 0 | oneArgNoText.add("\\end"); |
252 | 0 | oneArgNoText.add("\\cite"); |
253 | 0 | oneArgNoText.add("\\label"); |
254 | 0 | oneArgNoText.add("\\ref"); |
255 | 0 | oneArgNoText.add("\\pageref"); |
256 | 0 | oneArgNoText.add("\\pagestyle"); |
257 | 0 | oneArgNoText.add("\\thispagestyle"); |
258 | 0 | oneArgNoText.add("\\vspace"); |
259 | 0 | oneArgNoText.add("\\hspace"); |
260 | 0 | oneArgNoText.add("\\vskip"); |
261 | 0 | oneArgNoText.add("\\hskip"); |
262 | 0 | oneArgNoText.add("\\put"); |
263 | 0 | oneArgNoText.add("\\includegraphics"); |
264 | 0 | oneArgNoText.add("\\documentclass"); |
265 | 0 | oneArgNoText.add("\\usepackage"); |
266 | |
|
267 | 0 | oneArgInlineText.add("\\emph"); |
268 | 0 | oneArgInlineText.add("\\textbf"); |
269 | 0 | oneArgInlineText.add("\\texttt"); |
270 | 0 | oneArgInlineText.add("\\textsf"); |
271 | 0 | oneArgInlineText.add("\\textit"); |
272 | 0 | oneArgInlineText.add("\\hbox"); |
273 | 0 | oneArgInlineText.add("\\mbox"); |
274 | 0 | oneArgInlineText.add("\\vbox"); |
275 | |
|
276 | 0 | oneArgParText.add("\\typeout"); |
277 | 0 | oneArgParText.add("\\footnote"); |
278 | 0 | oneArgParText.add("\\author"); |
279 | 0 | oneArgParText.add("\\index"); |
280 | 0 | oneArgParText.add("\\title"); |
281 | 0 | oneArgParText.add("\\Chapter"); |
282 | 0 | oneArgParText.add("\\chapter"); |
283 | 0 | oneArgParText.add("\\section"); |
284 | 0 | } |
285 | |
|
286 | |
private String replaceOneArgNoText(LinkedList<String[]> substituted, LinkedList<String> commands, |
287 | |
String par) { |
288 | 0 | int counter = 0; |
289 | |
|
290 | 0 | for (Iterator<String> it = commands.iterator(); it.hasNext();) { |
291 | 0 | String command = it.next(); |
292 | |
|
293 | 0 | StringBuffer sb = new StringBuffer(); |
294 | |
|
295 | 0 | if (oneArgNoText.contains(command)) { |
296 | 0 | String find = ("\\" + command + "\\*?" + "(" + "\\[" + "[^\\]]*" + "\\]" + |
297 | |
|
298 | |
|
299 | |
"|" + "\\(" + "[^\\)]*" + "\\)" + |
300 | |
")?\\s*" + "\\{" + "[^\\}]*+" + "\\}"); |
301 | |
|
302 | 0 | Pattern p = Pattern.compile(find); |
303 | 0 | Matcher m = p.matcher(par); |
304 | 0 | while (m.find()) { |
305 | 0 | String replace = "<n" + String.valueOf(counter) + ">"; |
306 | 0 | String[] subst = { reHarden(m.group(0)), reHarden(replace) }; |
307 | 0 | substituted.addFirst(subst); |
308 | 0 | m.appendReplacement(sb, replace); |
309 | 0 | counter++; |
310 | 0 | } |
311 | 0 | m.appendTail(sb); |
312 | |
|
313 | 0 | par = sb.toString(); |
314 | |
} |
315 | 0 | } |
316 | 0 | return par; |
317 | |
} |
318 | |
|
319 | |
private String replaceOneArgInlineText(LinkedList<String[]> substituted, LinkedList<String> commands, |
320 | |
String par) { |
321 | 0 | int counter = 0; |
322 | |
|
323 | 0 | for (Iterator<String> it = commands.iterator(); it.hasNext();) { |
324 | 0 | String command = it.next(); |
325 | |
|
326 | 0 | StringBuffer sb = new StringBuffer(); |
327 | |
|
328 | 0 | if (oneArgInlineText.contains(command)) { |
329 | 0 | String find = ("(" + "\\" + command + "\\s*" + "\\{" + ")" + "(" + "[^\\}]*+" + ")" + "\\}"); |
330 | |
|
331 | 0 | Pattern p = Pattern.compile(find); |
332 | 0 | Matcher m = p.matcher(par); |
333 | 0 | while (m.find()) { |
334 | 0 | String preReplace = "<i" + String.valueOf(counter) + ">"; |
335 | 0 | String postReplace = "</i" + String.valueOf(counter) + ">"; |
336 | |
|
337 | 0 | String[] s1 = { reHarden(m.group(1)), reHarden(preReplace) }; |
338 | 0 | substituted.addFirst(s1); |
339 | |
|
340 | 0 | String[] s2 = { reHarden("}"), reHarden(postReplace) }; |
341 | 0 | substituted.addFirst(s2); |
342 | |
|
343 | 0 | String replace = (preReplace + "$2" + postReplace); |
344 | 0 | m.appendReplacement(sb, replace); |
345 | 0 | counter++; |
346 | 0 | } |
347 | 0 | m.appendTail(sb); |
348 | |
|
349 | 0 | par = sb.toString(); |
350 | |
} |
351 | 0 | } |
352 | 0 | return par; |
353 | |
} |
354 | |
|
355 | |
private String replaceOneArgParText(LinkedList<String[]> substituted, LinkedList<String> commands, |
356 | |
String par) { |
357 | 0 | int counter = 0; |
358 | |
|
359 | 0 | for (Iterator<String> it = commands.iterator(); it.hasNext();) { |
360 | 0 | String command = it.next(); |
361 | |
|
362 | 0 | StringBuffer sb = new StringBuffer(); |
363 | |
|
364 | 0 | if (oneArgParText.contains(command)) { |
365 | 0 | String find = ("(" + "\\" + command + "\\*?\\s*" + ")" + "\\{" + "(" + "[^\\}]*+" + ")" + "\\}"); |
366 | |
|
367 | 0 | Pattern p = Pattern.compile(find); |
368 | 0 | Matcher m = p.matcher(par); |
369 | 0 | while (m.find()) { |
370 | 0 | String replace = "<p" + String.valueOf(counter) + ">"; |
371 | 0 | String content = ""; |
372 | 0 | if (m.group(2) != null) |
373 | 0 | content = processParagraph(commands, m.group(2)); |
374 | |
|
375 | 0 | String[] subst = { reHarden(m.group(1) + "{" + content + "}"), reHarden(replace) }; |
376 | |
|
377 | 0 | substituted.addFirst(subst); |
378 | 0 | m.appendReplacement(sb, replace); |
379 | 0 | counter++; |
380 | 0 | } |
381 | 0 | m.appendTail(sb); |
382 | |
|
383 | 0 | par = sb.toString(); |
384 | |
} |
385 | 0 | } |
386 | 0 | return par; |
387 | |
} |
388 | |
|
389 | |
private String replaceUnknownCommand(LinkedList<String[]> substituted, LinkedList<String> commands, |
390 | |
String par) { |
391 | 0 | int counter = 0; |
392 | |
|
393 | 0 | for (Iterator<String> it = commands.iterator(); it.hasNext();) { |
394 | 0 | String command = it.next(); |
395 | |
|
396 | 0 | if (command.equals("\\\\") || command.equals("\\{") || command.equals("\\[")) |
397 | |
|
398 | 0 | command = "\\" + command; |
399 | |
|
400 | 0 | StringBuffer sb = new StringBuffer(); |
401 | 0 | String find = "\\" + command; |
402 | |
|
403 | 0 | Pattern p = Pattern.compile(find); |
404 | 0 | Matcher m = p.matcher(par); |
405 | 0 | while (m.find()) { |
406 | 0 | String replace = "<u" + String.valueOf(counter) + ">"; |
407 | 0 | String[] subst = { reHarden(m.group(0)), reHarden(replace) }; |
408 | 0 | substituted.addFirst(subst); |
409 | 0 | m.appendReplacement(sb, replace); |
410 | 0 | counter++; |
411 | 0 | } |
412 | 0 | m.appendTail(sb); |
413 | |
|
414 | 0 | par = sb.toString(); |
415 | 0 | } |
416 | 0 | return par; |
417 | |
} |
418 | |
|
419 | |
private String reHarden(String re) { |
420 | 0 | re = re.replaceAll("\\\\", "\\\\\\\\"); |
421 | 0 | re = re.replaceAll("\\[", "\\\\["); |
422 | 0 | re = re.replaceAll("\\^", "\\\\^"); |
423 | 0 | re = re.replaceAll("\\$", "\\\\\\$"); |
424 | 0 | re = re.replaceAll("\\{", "\\\\{"); |
425 | 0 | return re; |
426 | |
} |
427 | |
|
428 | |
private String processParagraph(LinkedList<String> commands, String par) { |
429 | 0 | LinkedList<String[]> substituted = new LinkedList<String[]>(); |
430 | |
|
431 | 0 | par = substituteUnicode(par); |
432 | |
|
433 | 0 | par = replaceOneArgNoText(substituted, commands, par); |
434 | 0 | par = replaceOneArgInlineText(substituted, commands, par); |
435 | 0 | par = replaceOneArgParText(substituted, commands, par); |
436 | 0 | par = replaceUnknownCommand(substituted, commands, par); |
437 | |
|
438 | 0 | String find = ("^((\\s*</?[nipu]\\d+>\\s*)*)" + "(.*?)" + "((\\s*</?[nipu]\\d+>\\s*)*)$"); |
439 | 0 | Pattern p = Pattern.compile(find); |
440 | 0 | Matcher m = p.matcher(par); |
441 | 0 | if (m.find()) { |
442 | 0 | par = ""; |
443 | 0 | if (m.group(1) != null) |
444 | 0 | par += m.group(1); |
445 | 0 | if (m.group(3) != null) |
446 | 0 | par += processEntry(m.group(3)); |
447 | 0 | if (m.group(4) != null) |
448 | 0 | par += m.group(4); |
449 | |
} |
450 | |
|
451 | 0 | par = resubstituteTex(par); |
452 | |
|
453 | 0 | ListIterator<String[]> it = substituted.listIterator(); |
454 | 0 | while (it.hasNext()) { |
455 | 0 | String[] subst = it.next(); |
456 | 0 | par = par.replaceAll(subst[1], subst[0]); |
457 | 0 | } |
458 | |
|
459 | 0 | return par; |
460 | |
} |
461 | |
|
462 | |
} |