Skip to content

Commit

Permalink
eTransl text size limit to 5000, after that use a document-based
Browse files Browse the repository at this point in the history
translation
  • Loading branch information
SrdjanStevanetic committed May 21, 2024
1 parent cf97ff3 commit 984ae09
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpStatus;
import org.apache.http.StatusLine;
Expand Down Expand Up @@ -46,9 +48,14 @@ public class ETranslationTranslationService extends AbstractTranslationService {
public static final String baseUrlTests="base-url-for-testing";
public static final String markupDelimiter="\ndeenPVsaOg\n";//base64 encoded string (as in generateRedisKey()) with new lines
public static final String markupDelimiterWithoutNewline="deenPVsaOg";
// public static final String markupDelimiter="\n[notranslate][/notranslate]\n";//please see docs here: https://language-tools.ec.europa.eu/ (under Developer's Corner, Tips and Tricks)
// public static final String markupDelimiterWithoutNewline="[notranslate][/notranslate]";

public static final String eTranslationErrorCallbackIndicator="eTranslationErrorCallback";
public static final String eTranslationCallbackRelativeUrl="/etranslation/callback";
public static final String eTranslationErrorCallbackRelativeUrl="/etranslation/error-callback";
public static final int eTranslationTextSnippetLimit=5000;
public static final int eTranslationTextSnippetLimitTests=200;

public ETranslationTranslationService(String etranslationServiceBaseUrl, String domain, String translationApiBaseUrl, int maxWaitMillisec,
String username, String password, RedisMessageListenerContainer redisMessageListenerContainer) throws TranslationException {
Expand Down Expand Up @@ -122,8 +129,7 @@ public void translate(List<TranslationObj> translationObjs) throws TranslationEx
//baseUrl is different for the integration tests, where the eTranslation service will not be called
if(! baseUrlTests.equals(baseUrl)) {
try {
// String body = createTranslationBodyAsHtmlDocument(eTranslJointStr,translationObjs.get(0).getSourceLang(),translationObjs.get(0).getTargetLang(),eTranslExtRef);
String body = createTranslationBodyWithPlainText(eTranslJointStr,translationObjs.get(0).getSourceLang(),translationObjs.get(0).getTargetLang(),eTranslExtRef);
String body = createTranslationBody(eTranslJointStr,translationObjs.get(0).getSourceLang(),translationObjs.get(0).getTargetLang(),eTranslExtRef);
createHttpRequest(body);
} catch (JSONException | UnsupportedEncodingException e) {
throw new TranslationException("Exception during the eTranslation http request body creation.", 0, e);
Expand All @@ -133,12 +139,23 @@ public void translate(List<TranslationObj> translationObjs) throws TranslationEx
}

//create a redis message listener obj, and wait on that obj until it get notified from the redis publisher
createRedisMessageListenerAndWaitForResults(translationObjs, eTranslExtRef);
createRedisMessageListenerAndWaitForResults(translationObjs, eTranslExtRef, eTranslJointStr.length());

}

private void createRedisMessageListenerAndWaitForResults(List<TranslationObj> translationObjs, String eTranslExtRef) throws TranslationException {
RedisMessageListener redisMessageListener = new RedisMessageListener();
private boolean sendRequestAsDocument(int textSize) {
//use smaller limit for the tests (e.g. 200)
if(baseUrlTests.equals(baseUrl)) {
return textSize>=eTranslationTextSnippetLimitTests ? true : false;
}
else {
return textSize>=eTranslationTextSnippetLimit ? true : false;
}
}

private void createRedisMessageListenerAndWaitForResults(List<TranslationObj> translationObjs, String eTranslExtRef, int textSize) throws TranslationException {
boolean requestAsDocument=sendRequestAsDocument(textSize);
RedisMessageListener redisMessageListener = new RedisMessageListener(requestAsDocument);
MessageListenerAdapter redisMessageListenerAdapter = new MessageListenerAdapter(redisMessageListener);
redisMessageListenerContainer.addMessageListener(redisMessageListenerAdapter, ChannelTopic.of(eTranslExtRef));
synchronized (redisMessageListener) {
Expand Down Expand Up @@ -177,7 +194,8 @@ private void createRedisMessageListenerAndWaitForResults(List<TranslationObj> tr
throw new TranslationException(response);
}

extractTranslationsFromETranslationResponse(translationObjs, redisMessageListenerAdapter, response);
extractTranslationsFromETranslationResponse(translationObjs, redisMessageListenerAdapter, response, requestAsDocument);

}
/* unsubscibe this listener which automatically deletes the created pub/sub channel,
* which also gets deleted if the app is stopped or anyhow broken.
Expand All @@ -186,15 +204,25 @@ private void createRedisMessageListenerAndWaitForResults(List<TranslationObj> tr
}
}


private void extractTranslationsFromETranslationResponse(List<TranslationObj> translationObjs, MessageListenerAdapter redisMessageListenerAdapter, String response, boolean responseAsDocument) throws TranslationException {
if(responseAsDocument) {
extractTranslationsFromETranslationTextDocumentResponse(translationObjs, redisMessageListenerAdapter, response);
}
else {
extractTranslationsFromETranslationTextSnippetResponse(translationObjs, redisMessageListenerAdapter, response);
}
}

/**
* This method extracts the translations from the eTransl html response
* This method extracts the translations from the eTransl html document response
* (the request is sent as an html base64 encoded document).
* @param translationObjs
* @param response
* @throws TranslationException
*/
/*
private void extractTranslationsFromETranslationHtmlResponse(List<TranslationObj> translationObjs, MessageListenerAdapter redisMessageListenerAdapter, String response) throws TranslationException {
private void extractTranslationsFromETranslationHtmlDocumentResponse(List<TranslationObj> translationObjs, MessageListenerAdapter redisMessageListenerAdapter, String response) throws TranslationException {
//first base64 decode
String respBase64Decoded = new String(Base64.decodeBase64(response), StandardCharsets.UTF_8);
Document jsoupDoc = Jsoup.parse(respBase64Decoded);
Expand All @@ -209,7 +237,29 @@ private void extractTranslationsFromETranslationHtmlResponse(List<TranslationObj
}
*/

private void extractTranslationsFromETranslationResponse(List<TranslationObj> translationObjs, MessageListenerAdapter redisMessageListenerAdapter, String response) throws TranslationException {
/**
* This method extracts the translations from the eTransl txt document response
* (the request is sent as an txt base64 encoded document).
* @param translationObjs
* @param redisMessageListenerAdapter
* @param response
* @throws TranslationException
*/
private void extractTranslationsFromETranslationTextDocumentResponse(List<TranslationObj> translationObjs, MessageListenerAdapter redisMessageListenerAdapter, String response) throws TranslationException {
//first base64 decode
String respBase64Decoded = new String(Base64.decodeBase64(response), StandardCharsets.UTF_8);
extractTranslationsFromETranslationTextSnippetResponse(translationObjs, redisMessageListenerAdapter, respBase64Decoded);
}

/**
* Extracts the translations in case the text snippet is sent in the request (as a text-to-translate parameter)
* @param translationObjs
* @param redisMessageListenerAdapter
* @param response
* @throws TranslationException
*/

private void extractTranslationsFromETranslationTextSnippetResponse(List<TranslationObj> translationObjs, MessageListenerAdapter redisMessageListenerAdapter, String response) throws TranslationException {
String[] translations=response.split(markupDelimiterWithoutNewline);
if(translations.length != translationObjs.size()) {
redisMessageListenerContainer.removeMessageListener(redisMessageListenerAdapter);
Expand All @@ -219,7 +269,7 @@ private void extractTranslationsFromETranslationResponse(List<TranslationObj> tr
translationObjs.get(i).setTranslation(translations[i].strip());
}
}

/**
* Generate one eTransl html string to be sent for the translation, as a combination of all input texts.
* This way the eTransl translates it as a document.
Expand Down Expand Up @@ -252,7 +302,26 @@ private String generateJointStringForTranslation(List<TranslationObj> translatio
return translJointString.toString();
}

private String createTranslationBodyWithPlainText(String text, String sourceLang, String targetLang, String externalReference) throws JSONException {

private String createTranslationBody(String text, String sourceLang, String targetLang, String externalReference) throws JSONException {
if(sendRequestAsDocument(text.length())) {
return createTranslationBodyWithDocument(text, sourceLang, targetLang, externalReference);
}
else {
return createTranslationBodyWithTextSnippet(text, sourceLang, targetLang, externalReference);
}
}

/**
* Creates a request with a text-snippet to translate (no document to be sent).
* @param text
* @param sourceLang
* @param targetLang
* @param externalReference
* @return
* @throws JSONException
*/
private String createTranslationBodyWithTextSnippet(String text, String sourceLang, String targetLang, String externalReference) throws JSONException {
JSONObject jsonBody = new JSONObject().put("priority", 0)
.put("requesterCallback", getTranslatioCallbackUrl())
.put("errorCallback", getTranslationErrorCallbackUrl())
Expand All @@ -267,10 +336,10 @@ private String createTranslationBodyWithPlainText(String text, String sourceLang
.put("textToTranslate", text);

return jsonBody.toString();
}
}

/**
* This method creates the translation request body with an html document to translate.
* This method creates the translation request body with a document to translate.
* The response is sent back to the application over a specified callback URL
* (REST service).
*
Expand All @@ -281,28 +350,26 @@ private String createTranslationBodyWithPlainText(String text, String sourceLang
* @return
* @throws JSONException
* @throws UnsupportedEncodingException
*/
/*
private String createTranslationBodyAsHtmlDocument(String text, String sourceLang, String targetLang, String externalReference)
*/
private String createTranslationBodyWithDocument(String text, String sourceLang, String targetLang, String externalReference)
throws JSONException {
String base64EncodedText=Base64.encodeBase64String(text.getBytes(StandardCharsets.UTF_8));
JSONObject jsonBody = new JSONObject().put("priority", 0)
// .put("requesterCallback", callbackUrl)
// .put("errorCallback", callbackErrorUrl)
// .put("requesterCallback", getTranslatioCallbackUrl())
// .put("errorCallback", getTranslationErrorCallbackUrl())
.put("externalReference", externalReference)
.put("callerInformation", new JSONObject().put("application", credentialUsername).put("username", credentialUsername))
.put("sourceLanguage", sourceLang.toUpperCase(Locale.ENGLISH))
.put("targetLanguages", new JSONArray().put(0, targetLang.toUpperCase(Locale.ENGLISH)))
.put("domain", domain)
.put("destinations",
new JSONObject().put("httpDestinations", new JSONArray().put(0, callbackUrl)))
new JSONObject().put("httpDestinations", new JSONArray().put(0, getTranslatioCallbackUrl())))
// .put("textToTranslate", text);
.put("documentToTranslateBase64",
new JSONObject().put("content", base64EncodedText).put("format", "html")
new JSONObject().put("content", base64EncodedText).put("format", "txt")
);
return jsonBody.toString();
}
*/

private long createHttpRequest(String content) throws TranslationException, IOException {
CredentialsProvider credsProvider = new BasicCredentialsProvider();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ public class RedisMessageListener implements MessageListener {

private static final Logger LOGGER = LogManager.getLogger(RedisMessageListener.class);
private String message;
//if true, the message received will be a document (e.g. from the eTranslation), otherwise a text-snippet
private boolean messageAsDocument;

public RedisMessageListener(boolean messageAsDocument) {
super();
this.messageAsDocument = messageAsDocument;
}

@Override
public void onMessage(Message message, byte[] pattern) {
synchronized(this) {
Expand All @@ -26,15 +33,20 @@ public void onMessage(Message message, byte[] pattern) {
this.message=messageBody;
}
else {
/*
* the received message is treated as a json object and we need some adjustments for the escaped characters
* (this only applies if we get the translated text from the translated-text field in the eTransl callback,
* which happens if we send the text to be translated in the textToTranslate request param)
*/
//remove double quotes at the beginning and at the end of the response, from some reason they are duplicated
String messageRemDuplQuotes = messageBody.replaceAll("^\"|\"$", "");
//replace a double backslash with a single backslash
this.message = messageRemDuplQuotes.replace("\\n", "\n");
if(messageAsDocument) {
this.message = messageBody;
}
else {
/*
* the received message is treated as a json object and we need some adjustments for the escaped characters
* (this only applies if we get the translated text from the translated-text field in the eTransl callback,
* which happens if we send the text to be translated in the textToTranslate request param)
*/
//remove double quotes at the beginning and at the end of the response, from some reason they are duplicated
String messageRemDuplQuotes = messageBody.replaceAll("^\"|\"$", "");
//replace a double backslash with a single backslash
this.message = messageRemDuplQuotes.replace("\\n", "\n");
}
}

//notify all threads waiting on this object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public abstract class IntegrationTestUtils {
public static final String TRANSLATION_PANGEANIC_RESPONSE_2 = "/content/pangeanic/translate/translate_pangeanic_response_2.json";

public static final String TRANSLATION_REQUEST_E_TRANSLATION = "/content/translation_request_eTranslation.json";
public static final String TRANSLATION_REQUEST_E_TRANSLATION_LONGER = "/content/translation_request_eTranslation_longer.json";

public static final String TRANSLATION_REQUEST_PANGEANIC_MULTIPLE_LANG = "/content/translation_pangeanic_multiple_languages_request.json";
public static final String TRANSLATION_PANGEANIC_REQUEST_MULTIPLE_LANG_DE = "/content/pangeanic/translate/translate_pangeanic_multiple_languages_request_DE.json";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ void translationPangeanic() throws Exception {
assertNotNull(serviceFieldValue);
}

class eTranslationSimulatorThread implements Runnable {
class eTranslationSimulatorThreadForTextSnippetTranslation implements Runnable {
private MockMvc mockMvc;
public eTranslationSimulatorThread(MockMvc mockMvc) {
public eTranslationSimulatorThreadForTextSnippetTranslation(MockMvc mockMvc) {
this.mockMvc = mockMvc;
}
@Override
Expand Down Expand Up @@ -156,10 +156,49 @@ public void run() {
}
}
}


class eTranslationSimulatorThreadForDocumentTranslation implements Runnable {
private MockMvc mockMvc;
public eTranslationSimulatorThreadForDocumentTranslation(MockMvc mockMvc) {
this.mockMvc = mockMvc;
}
@Override
public void run() {
try {
String requestJson = getJsonStringInput(TRANSLATION_REQUEST_E_TRANSLATION_LONGER);
String result = mockMvc
.perform(
post(BASE_URL_TRANSLATE)
.header(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE)
.header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)
.content(requestJson))
.andExpect(status().isOk())
.andReturn().getResponse().getContentAsString();

assertNotNull(result);
JSONObject json = new JSONObject(result);
String langFieldValue = json.getString(TranslationAppConstants.LANG);
assertEquals(LANGUAGE_EN, langFieldValue);

List<String> translations = Collections.singletonList(json.getString(TranslationAppConstants.TRANSLATIONS));
assertTrue(translations.contains("test first line in German, eight on caching, no cache 1!")
&& translations.contains("a second text in German, can be cached....")
&& translations.contains("a second text in German, can be cached....")
&& translations.contains("a third text in German, such as this")
&& translations.contains("and a fourth text such as: today’s news on www.heute.at"));
String serviceFieldValue = json.getString(TranslationAppConstants.SERVICE);
assertNotNull(serviceFieldValue);
} catch (Exception e) {
}
}
}

/*
* eTranslation that uses a text snippet based translation
*/
@Test
void translationETranslation() throws Exception {
Thread thread = new Thread(new eTranslationSimulatorThread(mockMvc));
void translationETranslationTextSnippet() throws Exception {
Thread thread = new Thread(new eTranslationSimulatorThreadForTextSnippetTranslation(mockMvc));
thread.start();
Thread.sleep(1000);
//trigger the eTranslation callback manually
Expand All @@ -181,6 +220,31 @@ void translationETranslation() throws Exception {

}

/*
* eTranslation that uses a document based translation
*/
@Test
void translationETranslationDocument() throws Exception {
Thread thread = new Thread(new eTranslationSimulatorThreadForDocumentTranslation(mockMvc));
thread.start();
Thread.sleep(1000);
//trigger the eTranslation callback manually
//computed in advance using the code in the eTransl service
String eTranslRef="et:deenC+N14w";
//base64 encoded translations
String translatedText="dGVzdCBmaXJzdCBsaW5lIGluIEdlcm1hbiwgZWlnaHQgb24gY2FjaGluZywgbm8gY2FjaGUgMSEKZGVlblBWc2FPZwphIHNlY29uZCB0ZXh0IGluIEdlcm1hbiwgY2FuIGJlIGNhY2hlZC4uLi4KZGVlblBWc2FPZwphIHRoaXJkIHRleHQgaW4gR2VybWFuLCBzdWNoIGFzIHRoaXMKZGVlblBWc2FPZwphbmQgYSBmb3VydGggdGV4dCBzdWNoIGFzOiB0b2RheeKAmXMgbmV3cyBvbiB3d3cuaGV1dGUuYXQ=";

mockMvc
.perform(
post(ETranslationTranslationService.eTranslationCallbackRelativeUrl).characterEncoding(StandardCharsets.UTF_8)
.param("external-reference", eTranslRef)
.content(translatedText))
.andExpect(status().isOk());

thread.join();

}

@Test
void translationPangeanicNoSrcMultipleLanguages() throws Exception {
String requestJson = getJsonStringInput(TRANSLATION_REQUEST_PANGEANIC_MULTIPLE_LANG);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"source": "de",
"target": "en",
"service": "ETRANSLATION",
"text": [
"erste Textzeile auf Deutsch test, achte auf caching, no cache 1!",
"einen zweiten Text auf Deutsch, kann gecached werden....",
"einen dritten Text auf Deutsch, wie zum Beispiel diesen",
"und einen vierten Text wie: heutige Nachrichten auf www.heute.at"
],
"caching":false
}
Loading

0 comments on commit 984ae09

Please sign in to comment.