-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor XLIFF Import AA and add unittests
This pulls out most of the target injection/alignment code into a separate class for easier testing.
- Loading branch information
Showing
8 changed files
with
520 additions
and
142 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
161 changes: 161 additions & 0 deletions
161
.../src/main/java/com/spartansoftwareinc/ws/autoactions/xliff/XLIFFTargetContentAligner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
package com.spartansoftwareinc.ws.autoactions.xliff; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.ArrayList; | ||
import java.util.Iterator; | ||
import java.util.List; | ||
import java.util.Set; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import org.apache.log4j.Logger; | ||
|
||
import com.idiominc.wssdk.asset.WSTextSegmentTranslation; | ||
import com.idiominc.wssdk.asset.WSTranslationType; | ||
import com.idiominc.wssdk.component.filter.WSFilter; | ||
import com.spartansoftwareinc.ws.okapi.filters.utils.FilterUtil; | ||
import net.sf.okapi.common.Event; | ||
import net.sf.okapi.common.LocaleId; | ||
import net.sf.okapi.common.resource.ITextUnit; | ||
import net.sf.okapi.common.resource.RawDocument; | ||
import net.sf.okapi.common.resource.Segment; | ||
import net.sf.okapi.common.resource.TextContainer; | ||
import net.sf.okapi.filters.xliff.XLIFFFilter; | ||
|
||
class XLIFFTargetContentAligner { | ||
private static final Logger LOG = Logger | ||
.getLogger(XLIFFTargetContentAligner.class); | ||
|
||
private WSTranslationType injectedTranslationType; | ||
private int nextPlaceholderId = 1; | ||
|
||
public XLIFFTargetContentAligner(WSTranslationType injectedTranslationType) { | ||
this.injectedTranslationType = injectedTranslationType; | ||
} | ||
|
||
// TODO: it would be cool to do some refactoring so this code always | ||
// stayed in sync with OkapiFilterBridge, upon the behavior of | ||
// which this depends. The current behavior of that class is to | ||
// produce one WS text segment for each Okapi Segment object within | ||
// the source TextContainer. | ||
public int alignTargetContent(InputStream xliffStream, String encoding, LocaleId srcLocale, | ||
Iterator<WSTextSegmentTranslation> textSegs) throws IOException { | ||
List<ITextUnit> xliffTus = getEvents(xliffStream, encoding, srcLocale); | ||
int count = 0; | ||
for (ITextUnit xliffTu : xliffTus) { | ||
TextContainer sourceTc = xliffTu.getSource(); | ||
TextContainer targetTc = findFirstTarget(xliffTu); | ||
if (targetTc == null) { | ||
for (@SuppressWarnings("unused") | ||
Segment seg : sourceTc.getSegments()) { | ||
skipSegment(textSegs); | ||
} | ||
continue; | ||
} | ||
for (Segment seg : targetTc.getSegments()) { | ||
boolean injected = injectNextSegment(seg, textSegs); | ||
if (injected) { | ||
count++; | ||
} | ||
} | ||
} | ||
return count; | ||
} | ||
|
||
private List<ITextUnit> getEvents(InputStream is, String encoding, LocaleId srcLocale) throws IOException { | ||
File tempFile = null; | ||
try (XLIFFFilter filter = new XLIFFFilter()) { | ||
// Filter may need multiple passes, so we need to buffer this to a | ||
// temp file | ||
tempFile = FilterUtil.convertContentIntoFile(is, ".xlf"); | ||
RawDocument rd = new RawDocument(tempFile.toURI(), encoding, srcLocale, srcLocale); | ||
filter.open(rd, false); | ||
List<ITextUnit> tus = new ArrayList<ITextUnit>(); | ||
while (filter.hasNext()) { | ||
Event e = filter.next(); | ||
if (e.isTextUnit()) { | ||
tus.add(e.getTextUnit()); | ||
} | ||
} | ||
return tus; | ||
} finally { | ||
if (tempFile != null) { | ||
tempFile.delete(); | ||
} | ||
} | ||
} | ||
|
||
TextContainer findFirstTarget(ITextUnit tu) { | ||
Set<LocaleId> locales = tu.getTargetLocales(); | ||
LocaleId first = locales.iterator().next(); | ||
LOG.debug("First target locale is " + first); | ||
return tu.getTarget(first); | ||
} | ||
|
||
void skipSegment(Iterator<WSTextSegmentTranslation> textSegs) { | ||
checkForMoreWSSegments(textSegs); | ||
WSTextSegmentTranslation textSeg = textSegs.next(); | ||
LOG.info("Skipping segment [" + textSeg.getSource() + "]"); | ||
} | ||
|
||
/** | ||
* Update the next text segment translation with content from the XLIFF. Do nothing | ||
* if the translation was already the same as the XLIFF content. | ||
* @return true if the translation was updated, false if the translation was already | ||
* the same as the XLIFF content. | ||
*/ | ||
boolean injectNextSegment(Segment xliffSeg, Iterator<WSTextSegmentTranslation> textSegs) { | ||
checkForMoreWSSegments(textSegs); | ||
WSTextSegmentTranslation textSeg = textSegs.next(); | ||
|
||
WSTextSegmentData wsMatch = WSTextSegmentData.fromOkapiSegment(xliffSeg); | ||
String text = assignPlaceholderIds(wsMatch.getText()); | ||
if (textSeg.getTarget() == null || !textSeg.getTarget().equals(text)) { | ||
LOG.info("Overwriting existing target=[" + textSeg.getTarget() + "] with new target=[" + text + "]"); | ||
textSeg.setTarget(text); | ||
textSeg.setTranslationType(injectedTranslationType); | ||
return true; | ||
} else { | ||
return false; | ||
} | ||
} | ||
|
||
Pattern PH_PATTERN = Pattern.compile("\\{(\\d+)\\}"); | ||
|
||
private String assignPlaceholderIds(String text) { | ||
StringBuilder sb = new StringBuilder(); | ||
for (int i = 0; i < text.length(); i++) { | ||
char c = text.charAt(i); | ||
if (c == '{') { | ||
String test = text.substring(i); | ||
if (test.startsWith(WSFilter.PLACEHOLDER)) { | ||
sb.append("{").append(Integer.toString(nextPlaceholderId++)).append("}"); | ||
i += WSFilter.PLACEHOLDER.length() - 1; | ||
} | ||
else { | ||
// Escape "fake placeholders" | ||
Matcher m = PH_PATTERN.matcher(test); | ||
if (m.lookingAt()) { | ||
sb.append("\\{").append(m.group(1)).append("\\}"); | ||
i += m.group().length() - 1; | ||
} | ||
else { | ||
sb.append(c); | ||
} | ||
} | ||
} | ||
else { | ||
sb.append(c); | ||
} | ||
} | ||
return sb.toString(); | ||
} | ||
|
||
private void checkForMoreWSSegments(Iterator<WSTextSegmentTranslation> textSegs) { | ||
if (!textSegs.hasNext()) { | ||
throw new IllegalStateException("Source XLIFF contains more segments than asset"); | ||
} | ||
} | ||
} |
26 changes: 26 additions & 0 deletions
26
...liff/src/test/java/com/spartansoftwareinc/ws/autoactions/xliff/WSTextSegmentDataTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package com.spartansoftwareinc.ws.autoactions.xliff; | ||
|
||
import org.junit.Test; | ||
import static org.junit.Assert.*; | ||
|
||
import com.idiominc.wssdk.component.filter.WSFilter; | ||
|
||
import net.sf.okapi.common.resource.Code; | ||
import net.sf.okapi.common.resource.Segment; | ||
import net.sf.okapi.common.resource.TextFragment; | ||
import net.sf.okapi.common.resource.TextFragment.TagType; | ||
|
||
public class WSTextSegmentDataTest { | ||
|
||
@Test | ||
public void fromOkapiSegment() { | ||
TextFragment tf = new TextFragment("Hello "); | ||
tf.append(new Code(TagType.OPENING, "bold", "<b>")); | ||
tf.append("world"); | ||
tf.append(new Code(TagType.CLOSING, "bold", "</b>")); | ||
Segment segment = new Segment("seg1", tf); | ||
WSTextSegmentData data = WSTextSegmentData.fromOkapiSegment(segment); | ||
assertEquals("Hello " + WSFilter.PLACEHOLDER + "world" + WSFilter.PLACEHOLDER, data.getText()); | ||
assertArrayEquals(new String[] { "<b>", "</b>" }, data.getPlaceholders()); | ||
} | ||
} |
Oops, something went wrong.