Fix null elements for some mixed case words in Armenian (combinations with U and Vo)

This commit is contained in:
Alik Aslanyan 2024-11-09 19:56:53 +04:00 committed by Arjan Schrijver
parent 1ca387df7a
commit 3b58965b0d
2 changed files with 120 additions and 103 deletions

View File

@ -29,6 +29,7 @@ import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public class ArmenianTransliterator implements Transliterator {
// Transliteration map ordered by priority
@ -63,6 +64,7 @@ public class ArmenianTransliterator implements Transliterator {
put("յ","y");
put("ն","n");
put("շ","sh");
put("ո", "vo");
put("չ","ch");
put("պ","p");
put("ջ","j");
@ -77,68 +79,78 @@ public class ArmenianTransliterator implements Transliterator {
put("օ","o");
put("և","ev");
put("ֆ","f");
put("՝", "`");
put("՞", "?");
put("։", ":");
put("", ".");
}
};
// Capitalize existing simple substitutions here
for (final Entry<String, String> entry : new ArrayList<Entry<String, String>>(simpleSubstitions.entrySet())) {
String capitalKey = entry.getKey().toUpperCase();
if (!capitalKey.equals(entry.getKey())) {
simpleSubstitions.put(capitalKey, entry.getValue().toUpperCase());
}
}
// Letter + 'ու'
char[] letterMapU = {
'ա',
'բ',
'գ',
'դ',
'ե',
'զ',
'է',
'ը',
'թ',
'ժ',
'ի',
'լ',
'խ',
'ծ',
'կ',
'հ',
'ձ',
'ղ',
'ճ',
'մ',
'յ',
'ն',
'շ',
'չ',
'պ',
'ջ',
'ռ',
'ս',
'վ',
'տ',
'ր',
'ց',
'փ',
'ք',
'օ',
'և',
'ֆ',
'ո',
final String[] letterMapU = {
"ա",
"բ",
"գ",
"դ",
"ե",
"զ",
"է",
"ը",
"թ",
"ժ",
"ի",
"լ",
"խ",
"ծ",
"կ",
"հ",
"ձ",
"ղ",
"ճ",
"մ",
"յ",
"ն",
"շ",
"չ",
"պ",
"ջ",
"ռ",
"ս",
"վ",
"տ",
"ր",
"ց",
"փ",
"ք",
"օ",
"և",
"ֆ",
"ո"
};
for(char letter : letterMapU) {
char capitalLetter = Character.toUpperCase(letter);
final String transliteratedLetter = simpleSubstitions.get(Character.toString(letter));
final String transliteratedCapitalLetter = simpleSubstitions.get(Character.toString(capitalLetter));
for (final String letter : letterMapU) {
final String capitalLetter = letter.toUpperCase();
final String transliteratedLetter = Objects.requireNonNull(simpleSubstitions.get(letter), letter);
final String transliteratedCapitalLetter = Objects.requireNonNull(simpleSubstitions.get(capitalLetter), capitalLetter);
put(Character.toString(letter) + "ու", transliteratedLetter + "u");
put(Character.toString(capitalLetter) + "ու", transliteratedCapitalLetter + "u");
put(letter + "ու", transliteratedLetter + "u");
put(capitalLetter + "ու", transliteratedCapitalLetter + "u");
put(Character.toString(letter) + "ՈՒ", transliteratedLetter + "U");
put(Character.toString(capitalLetter) + "ՈՒ", transliteratedCapitalLetter + "U");
put(letter + "ՈՒ", transliteratedLetter + "U");
put(capitalLetter + "ՈՒ", transliteratedCapitalLetter + "U");
put(letter + "Ու", transliteratedLetter + "U");
put(capitalLetter + "Ու", transliteratedCapitalLetter + "U");
put(Character.toString(letter) + "Ու", transliteratedLetter + "U");
put(Character.toString(capitalLetter) + "Ու", transliteratedCapitalLetter + "U");
put(Character.toString(letter) + "ոՒ", transliteratedLetter + "U");
put(Character.toString(capitalLetter) + "ոՒ", transliteratedCapitalLetter + "U");
put(letter + "ոՒ", transliteratedLetter + "U");
put(capitalLetter + "ոՒ", transliteratedCapitalLetter + "U");
}
put("ու","u");
@ -147,50 +159,51 @@ public class ArmenianTransliterator implements Transliterator {
put("ՈՒ","U");
// Letter + 'ո'
char[] letterMapVo = {
'բ',
'գ',
'դ',
'զ',
'թ',
'ժ',
'լ',
'խ',
'ծ',
'կ',
'հ',
'ձ',
'ղ',
'ճ',
'մ',
'յ',
'ն',
'շ',
'չ',
'պ',
'ջ',
'ռ',
'ս',
'վ',
'տ',
'ր',
'ց',
'փ',
'ք',
'և',
'ֆ',
final String[] letterMapVo = {
"բ",
"գ",
"դ",
"զ",
"թ",
"ժ",
"լ",
"խ",
"ծ",
"կ",
"հ",
"ձ",
"ղ",
"ճ",
"մ",
"յ",
"ն",
"շ",
"ո", // ո + ո should be voo
"չ",
"պ",
"ջ",
"ռ",
"ս",
"վ",
"տ",
"ր",
"ց",
"փ",
"ք",
"և",
"ֆ"
};
for(char letter : letterMapVo) {
char capitalLetter = Character.toUpperCase(letter);
final String transliteratedLetter = simpleSubstitions.get(Character.toString(letter));
final String transliteratedCapitalLetter = simpleSubstitions.get(Character.toString(capitalLetter));
for (String letter : letterMapVo) {
String capitalLetter = letter.toUpperCase();
final String transliteratedLetter = Objects.requireNonNull(simpleSubstitions.get(letter));
final String transliteratedCapitalLetter = Objects.requireNonNull(simpleSubstitions.get(capitalLetter));
put(Character.toString(letter) + "ո", transliteratedLetter + "o");
put(Character.toString(capitalLetter) + "ո", transliteratedCapitalLetter + "o");
put(letter + "ո", transliteratedLetter + "o");
put(capitalLetter + "ո", transliteratedCapitalLetter + "o");
put(Character.toString(letter) + "Ո", transliteratedLetter + "Օ");
put(Character.toString(capitalLetter) + "Ո", transliteratedCapitalLetter + "Օ");
put(letter + "Ո", transliteratedLetter + "Օ");
put(capitalLetter + "Ո", transliteratedCapitalLetter + "Օ");
}
put("ո","vo");
@ -213,7 +226,6 @@ public class ArmenianTransliterator implements Transliterator {
put(entry.getKey(), entry.getValue());
put(entry.getKey().toUpperCase(), entry.getValue().toUpperCase());
}
}};
private static final Map<String, Integer> transliterationPriorityMap = new HashMap<String, Integer>() {{
@ -271,7 +283,7 @@ public class ArmenianTransliterator implements Transliterator {
sb.append(text.substring(prevIndex, matchIndex));
// Substitute and append to the builder
sb.append( ArmenianTransliterator.transliterateMap.get( emit.getKeyword() ) );
sb.append(Objects.requireNonNull(ArmenianTransliterator.transliterateMap.get(emit.getKeyword())));
prevIndex = emit.getEnd() + 1;
}

View File

@ -18,6 +18,9 @@ public class ArmenianTransliteratorTest extends TestCase {
Assert.assertEquals("vorotan", new ArmenianTransliterator().transliterate("որոտան"));
Assert.assertEquals("voroshel", new ArmenianTransliterator().transliterate("որոշել"));
Assert.assertEquals("uzox", new ArmenianTransliterator().transliterate("ուզող"));
Assert.assertEquals(
"AVO", new ArmenianTransliterator().transliterate("ԱՈ")
);
}
@Test
@ -44,9 +47,11 @@ public class ArmenianTransliteratorTest extends TestCase {
Assert.assertEquals(
"Ushadir", new ArmenianTransliterator().transliterate("Ուշադիր")
);
Assert.assertEquals(
"AU", new ArmenianTransliterator().transliterate("ԱՈւ")
);
}
@Test
public void testTop100Words() {
final Map<String,String> topWords = new LinkedHashMap<String,String>() {{