Refactor XLIFF Import AA and add unittests

This pulls out most of the target injection/alignment code into a separate class for easier testing.
spartansw · Nov 28, 2016 · 27b52a8 · 27b52a8
1 parent 32d250a
commit 27b52a8
Show file tree

Hide file tree

Showing 8 changed files with 520 additions and 142 deletions.
diff --git a/autoactions/xliff/pom.xml b/autoactions/xliff/pom.xml
@@ -19,6 +19,13 @@
       <artifactId>okapi-ws-filters-xliff</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>com.spartansoftwareinc.ws.okapi.filters</groupId>
+      <artifactId>okapi-ws-filters-base</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
   </dependencies>
 
   <build>

diff --git a/...n/java/com/spartansoftwareinc/ws/autoactions/xliff/ImportXLIFFTargetsAutomaticAction.java b/...n/java/com/spartansoftwareinc/ws/autoactions/xliff/ImportXLIFFTargetsAutomaticAction.java
@@ -1,27 +1,14 @@
 package com.spartansoftwareinc.ws.autoactions.xliff;
 
-import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Iterator;
-import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import net.sf.okapi.common.Event;
 import net.sf.okapi.common.LocaleId;
-import net.sf.okapi.common.resource.ITextUnit;
-import net.sf.okapi.common.resource.RawDocument;
-import net.sf.okapi.common.resource.Segment;
-import net.sf.okapi.common.resource.TextContainer;
-import net.sf.okapi.filters.xliff.XLIFFFilter;
-
 import org.apache.log4j.Logger;
 
 import com.idiominc.wssdk.WSContext;
 import com.idiominc.wssdk.WSException;
+import com.idiominc.wssdk.WSRuntimeException;
 import com.idiominc.wssdk.ais.WSNode;
 import com.idiominc.wssdk.asset.WSAssetTask;
 import com.idiominc.wssdk.asset.WSAssetTranslation;
@@ -31,7 +18,6 @@
 import com.idiominc.wssdk.component.WSParameterFactory;
 import com.idiominc.wssdk.component.autoaction.WSActionResult;
 import com.idiominc.wssdk.component.autoaction.WSTaskAutomaticAction;
-import com.idiominc.wssdk.component.filter.WSFilter;
 import com.idiominc.wssdk.workflow.WSTask;
 import com.spartansoftwareinc.ws.okapi.Version;
 import com.spartansoftwareinc.ws.okapi.filters.utils.FilterUtil;
@@ -50,7 +36,6 @@ public class ImportXLIFFTargetsAutomaticAction extends WSTaskAutomaticAction {
 
     private AssetType assetType;
     private WSTranslationType injectedTranslationType;
-    private int nextPlaceholderId = 1;
 
     @Override
     public String getDescription() {
@@ -93,135 +78,21 @@ protected WSNode getAsset(WSAssetTask assetTask) {
         }
     }
 
-    // TODO: it would be cool to do some refactoring so this code always
-    // stayed in sync with OkapiFilterBridge, upon the behavior of
-    // which this depends. The current behavior of that class is to
-    // produce one WS text segment for each Okapi Segment object within
-    // the source TextContainer.
     int injectTargetContent(WSNode node, WSAssetTranslation translation)
             throws WSException {
-        List<ITextUnit> xliffTus = getEvents(node);
+        LocaleId okapiSrcLocale = FilterUtil.getOkapiLocaleId(node);
+        String encoding = node.getEncoding() != null ?
+                node.getEncoding() : XLIFFWSOkapiFilter.DEFAULT_XLIFF_ENCODING;
         @SuppressWarnings("unchecked")
         Iterator<WSTextSegmentTranslation> textSegs = (Iterator<WSTextSegmentTranslation>)translation.textSegmentIterator();
-        int count = 0;
-        for (ITextUnit xliffTu : xliffTus) {
-            TextContainer sourceTc = xliffTu.getSource();
-            TextContainer targetTc = findFirstTarget(xliffTu);
-            if (targetTc == null) {
-                for (@SuppressWarnings("unused")
-                Segment seg : sourceTc.getSegments()) {
-                    skipSegment(textSegs);
-                }
-                continue;
-            }
-            for (Segment seg : targetTc.getSegments()) {
-                boolean injected = injectNextSegment(seg, textSegs);
-                if (injected) {
-                    count++;
-                }
-            }
+        XLIFFTargetContentAligner aligner = new XLIFFTargetContentAligner(injectedTranslationType);
+        try {
+            int count = aligner.alignTargetContent(node.getInputStream(), encoding, okapiSrcLocale, textSegs);
+            LOG.info("Imported " + count + " segment translations");
+            return count;
         }
-        LOG.info("Imported " + count + " segment translations");
-        return count;
-    }
-
-    TextContainer findFirstTarget(ITextUnit tu) {
-        Set<LocaleId> locales = tu.getTargetLocales();
-        LocaleId first = locales.iterator().next();
-        LOG.debug("First target locale is " + first);
-        return tu.getTarget(first);
-    }
-
-    void skipSegment(Iterator<WSTextSegmentTranslation> textSegs) {
-        checkForMoreWSSegments(textSegs);
-        WSTextSegmentTranslation textSeg = textSegs.next();
-        LOG.info("Skipping segment [" + textSeg.getSource() + "]");
-    }
-
-    /**
-     * Update the next text segment translation with content from the XLIFF. Do nothing
-     * if the translation was already the same as the XLIFF content.
-     * @return true if the translation was updated, false if the translation was already
-     * the same as the XLIFF content.
-     */
-    boolean injectNextSegment(Segment xliffSeg, Iterator<WSTextSegmentTranslation> textSegs) {
-        checkForMoreWSSegments(textSegs);
-        WSTextSegmentTranslation textSeg = textSegs.next();
-
-        WSTextSegmentData wsMatch = WSTextSegmentData.fromOkapiSegment(xliffSeg);
-        String text = assignPlaceholderIds(wsMatch.getText());
-        if (textSeg.getTarget() == null || !textSeg.getTarget().equals(text)) {
-            LOG.info("Overwriting existing target=[" + textSeg.getTarget() + "] with new target=[" + text + "]");
-            textSeg.setTarget(text);
-            textSeg.setTranslationType(injectedTranslationType);
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    Pattern PH_PATTERN = Pattern.compile("\\{(\\d+)\\}");
-
-    private String assignPlaceholderIds(String text) {
-        StringBuilder sb = new StringBuilder();
-        for (int i = 0; i < text.length(); i++) {
-            char c = text.charAt(i);
-            if (c == '{') {
-                String test = text.substring(i);
-                if (test.startsWith(WSFilter.PLACEHOLDER)) {
-                    sb.append("{").append(Integer.toString(nextPlaceholderId++)).append("}");
-                    i += WSFilter.PLACEHOLDER.length() - 1;
-                }
-                else {
-                    // Escape "fake placeholders"
-                    Matcher m = PH_PATTERN.matcher(test);
-                    if (m.lookingAt()) {
-                        sb.append("\\{").append(m.group(1)).append("\\}");
-                        i += m.group().length() - 1;
-                    }
-                    else {
-                        sb.append(c);
-                    }
-                }
-            }
-            else {
-                sb.append(c);
-            }
-        }
-        return sb.toString();
-    }
-
-    private void checkForMoreWSSegments(Iterator<WSTextSegmentTranslation> textSegs) {
-        if (!textSegs.hasNext()) {
-            throw new IllegalStateException("Source XLIFF contains more segments than asset");
-        }
-    }
-
-    private List<ITextUnit> getEvents(WSNode node) throws WSException {
-        LocaleId okapiSrcLocale = FilterUtil.getOkapiLocaleId(node);
-        File tempFile = null;
-        try (XLIFFFilter filter = new XLIFFFilter()) {
-            // Filter may need multiple passes, so we need to buffer this to a
-            // temp file
-            tempFile = FilterUtil.convertAisContentIntoFile(node);
-            String encoding = node.getEncoding() != null ?
-                    node.getEncoding() : XLIFFWSOkapiFilter.DEFAULT_XLIFF_ENCODING;
-            RawDocument rd = new RawDocument(tempFile.toURI(), encoding, okapiSrcLocale, okapiSrcLocale);
-            filter.open(rd, false);
-            List<ITextUnit> tus = new ArrayList<ITextUnit>();
-            while (filter.hasNext()) {
-                Event e = filter.next();
-                if (e.isTextUnit()) {
-                    tus.add(e.getTextUnit());
-                }
-            }
-            return tus;
-        } catch (IOException e) {
-            throw new WSException(e);
-        } finally {
-            if (tempFile != null) {
-                tempFile.delete();
-            }
+        catch (IOException e) {
+            throw new WSRuntimeException(e);
         }
     }
 

diff --git a/.../src/main/java/com/spartansoftwareinc/ws/autoactions/xliff/XLIFFTargetContentAligner.java b/.../src/main/java/com/spartansoftwareinc/ws/autoactions/xliff/XLIFFTargetContentAligner.java
@@ -0,0 +1,161 @@
+package com.spartansoftwareinc.ws.autoactions.xliff;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Logger;
+
+import com.idiominc.wssdk.asset.WSTextSegmentTranslation;
+import com.idiominc.wssdk.asset.WSTranslationType;
+import com.idiominc.wssdk.component.filter.WSFilter;
+import com.spartansoftwareinc.ws.okapi.filters.utils.FilterUtil;
+import net.sf.okapi.common.Event;
+import net.sf.okapi.common.LocaleId;
+import net.sf.okapi.common.resource.ITextUnit;
+import net.sf.okapi.common.resource.RawDocument;
+import net.sf.okapi.common.resource.Segment;
+import net.sf.okapi.common.resource.TextContainer;
+import net.sf.okapi.filters.xliff.XLIFFFilter;
+
+class XLIFFTargetContentAligner {
+    private static final Logger LOG = Logger
+            .getLogger(XLIFFTargetContentAligner.class);
+
+    private WSTranslationType injectedTranslationType;
+    private int nextPlaceholderId = 1;
+
+    public XLIFFTargetContentAligner(WSTranslationType injectedTranslationType) {
+        this.injectedTranslationType = injectedTranslationType;
+    }
+
+    // TODO: it would be cool to do some refactoring so this code always
+    // stayed in sync with OkapiFilterBridge, upon the behavior of
+    // which this depends. The current behavior of that class is to
+    // produce one WS text segment for each Okapi Segment object within
+    // the source TextContainer.
+    public int alignTargetContent(InputStream xliffStream, String encoding, LocaleId srcLocale,
+                                  Iterator<WSTextSegmentTranslation> textSegs) throws IOException {
+        List<ITextUnit> xliffTus = getEvents(xliffStream, encoding, srcLocale);
+        int count = 0;
+        for (ITextUnit xliffTu : xliffTus) {
+            TextContainer sourceTc = xliffTu.getSource();
+            TextContainer targetTc = findFirstTarget(xliffTu);
+            if (targetTc == null) {
+                for (@SuppressWarnings("unused")
+                Segment seg : sourceTc.getSegments()) {
+                    skipSegment(textSegs);
+                }
+                continue;
+            }
+            for (Segment seg : targetTc.getSegments()) {
+                boolean injected = injectNextSegment(seg, textSegs);
+                if (injected) {
+                    count++;
+                }
+            }
+        }
+        return count;
+    }
+
+    private List<ITextUnit> getEvents(InputStream is, String encoding, LocaleId srcLocale) throws IOException {
+        File tempFile = null;
+        try (XLIFFFilter filter = new XLIFFFilter()) {
+            // Filter may need multiple passes, so we need to buffer this to a
+            // temp file
+            tempFile = FilterUtil.convertContentIntoFile(is, ".xlf");
+            RawDocument rd = new RawDocument(tempFile.toURI(), encoding, srcLocale, srcLocale);
+            filter.open(rd, false);
+            List<ITextUnit> tus = new ArrayList<ITextUnit>();
+            while (filter.hasNext()) {
+                Event e = filter.next();
+                if (e.isTextUnit()) {
+                    tus.add(e.getTextUnit());
+                }
+            }
+            return tus;
+        } finally {
+            if (tempFile != null) {
+                tempFile.delete();
+            }
+        }
+    }
+
+    TextContainer findFirstTarget(ITextUnit tu) {
+        Set<LocaleId> locales = tu.getTargetLocales();
+        LocaleId first = locales.iterator().next();
+        LOG.debug("First target locale is " + first);
+        return tu.getTarget(first);
+    }
+
+    void skipSegment(Iterator<WSTextSegmentTranslation> textSegs) {
+        checkForMoreWSSegments(textSegs);
+        WSTextSegmentTranslation textSeg = textSegs.next();
+        LOG.info("Skipping segment [" + textSeg.getSource() + "]");
+    }
+
+    /**
+     * Update the next text segment translation with content from the XLIFF. Do nothing
+     * if the translation was already the same as the XLIFF content.
+     * @return true if the translation was updated, false if the translation was already
+     * the same as the XLIFF content.
+     */
+    boolean injectNextSegment(Segment xliffSeg, Iterator<WSTextSegmentTranslation> textSegs) {
+        checkForMoreWSSegments(textSegs);
+        WSTextSegmentTranslation textSeg = textSegs.next();
+
+        WSTextSegmentData wsMatch = WSTextSegmentData.fromOkapiSegment(xliffSeg);
+        String text = assignPlaceholderIds(wsMatch.getText());
+        if (textSeg.getTarget() == null || !textSeg.getTarget().equals(text)) {
+            LOG.info("Overwriting existing target=[" + textSeg.getTarget() + "] with new target=[" + text + "]");
+            textSeg.setTarget(text);
+            textSeg.setTranslationType(injectedTranslationType);
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    Pattern PH_PATTERN = Pattern.compile("\\{(\\d+)\\}");
+
+    private String assignPlaceholderIds(String text) {
+        StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < text.length(); i++) {
+            char c = text.charAt(i);
+            if (c == '{') {
+                String test = text.substring(i);
+                if (test.startsWith(WSFilter.PLACEHOLDER)) {
+                    sb.append("{").append(Integer.toString(nextPlaceholderId++)).append("}");
+                    i += WSFilter.PLACEHOLDER.length() - 1;
+                }
+                else {
+                    // Escape "fake placeholders"
+                    Matcher m = PH_PATTERN.matcher(test);
+                    if (m.lookingAt()) {
+                        sb.append("\\{").append(m.group(1)).append("\\}");
+                        i += m.group().length() - 1;
+                    }
+                    else {
+                        sb.append(c);
+                    }
+                }
+            }
+            else {
+                sb.append(c);
+            }
+        }
+        return sb.toString();
+    }
+
+    private void checkForMoreWSSegments(Iterator<WSTextSegmentTranslation> textSegs) {
+        if (!textSegs.hasNext()) {
+            throw new IllegalStateException("Source XLIFF contains more segments than asset");
+        }
+    }
+}
diff --git a/...liff/src/test/java/com/spartansoftwareinc/ws/autoactions/xliff/WSTextSegmentDataTest.java b/...liff/src/test/java/com/spartansoftwareinc/ws/autoactions/xliff/WSTextSegmentDataTest.java
@@ -0,0 +1,26 @@
+package com.spartansoftwareinc.ws.autoactions.xliff;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+import com.idiominc.wssdk.component.filter.WSFilter;
+
+import net.sf.okapi.common.resource.Code;
+import net.sf.okapi.common.resource.Segment;
+import net.sf.okapi.common.resource.TextFragment;
+import net.sf.okapi.common.resource.TextFragment.TagType;
+
+public class WSTextSegmentDataTest {
+
+    @Test
+    public void fromOkapiSegment() {
+        TextFragment tf = new TextFragment("Hello ");
+        tf.append(new Code(TagType.OPENING, "bold", "<b>"));
+        tf.append("world");
+        tf.append(new Code(TagType.CLOSING, "bold", "</b>"));
+        Segment segment = new Segment("seg1", tf);
+        WSTextSegmentData data = WSTextSegmentData.fromOkapiSegment(segment);
+        assertEquals("Hello " + WSFilter.PLACEHOLDER + "world" + WSFilter.PLACEHOLDER, data.getText());
+        assertArrayEquals(new String[] { "<b>", "</b>" }, data.getPlaceholders());
+    }
+}