Gadgetbridge/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/BengaliLanguageUtils.java

227 lines
7.4 KiB
Java
Raw Normal View History

2017-11-22 18:57:05 +01:00
package nodomain.freeyourgadget.gadgetbridge.util;
import java.util.HashMap;
import java.util.regex.*;
2017-11-22 18:57:05 +01:00
2018-08-24 16:57:38 +02:00
// What's the reason to extending LanguageUtils?
// Just doing it because already done in the previous code.
2017-11-22 18:57:05 +01:00
public class BengaliLanguageUtils extends LanguageUtils {
2018-08-24 16:57:38 +02:00
// Composite Letters.
private final static HashMap<String, String> composites = new HashMap<String, String>() {
2017-11-22 18:57:05 +01:00
{
put("ক্ষ", "kkh");
put("ঞ্চ", "NC");
put("ঞ্ছ", "NCh");
put("ঞ্জ", "Ng");
put("জ্ঞ", "gg");
put("ঞ্ঝ", "Ngh");
put("্র", "r");
put("্ল", "l");
put("ষ্ম", "SSh");
put("র্", "r");
put("্য", "y");
put("্ব", "w");
2017-11-22 18:57:05 +01:00
}
};
// Vowels Only
private final static HashMap<String, String> vowelsAndHasants = new HashMap<String, String>() {
{
put("", "aa");
put("", "a");
put("", "i");
put("", "ii");
put("", "u");
put("", "uu");
put("", "ri");
put("", "e");
put("", "oi");
put("", "o");
put("", "ou");
put("", "aa");
put("ি", "i");
put("", "ii");
put("", "u");
put("", "uu");
put("", "r");
put("", "e");
put("", "o");
put("", "oi");
put("", "ou");
put("", "ou");
put("", "ng");
put("", "h");
put("", ".");
}
};
2018-08-24 16:57:38 +02:00
// Single Character Letters.
private final static HashMap<String, String> letters = new HashMap<String, String>() {
2017-11-22 18:57:05 +01:00
{
put("", "aa");
put("", "a");
put("", "i");
put("", "ii");
put("", "u");
put("", "uu");
put("", "ri");
put("", "e");
put("", "oi");
put("", "o");
put("", "ou");
put("", "k");
put("", "kh");
put("", "g");
put("", "gh");
put("", "ng");
put("", "ch");
put("", "chh");
put("", "j");
put("", "jh");
put("", "Ng");
put("", "T");
put("", "Th");
put("", "D");
put("", "Dh");
put("", "N");
put("", "t");
put("", "th");
put("", "d");
put("", "dh");
put("", "n");
put("", "p");
put("", "ph");
put("", "b");
put("", "bh");
put("", "m");
put("", "J");
put("", "r");
put("", "l");
put("", "sh");
put("", "Sh");
put("", "s");
put("", "h");
put("", "rh");
put("", "rH");
put("", "y");
put("", "t");
put("", "0");
put("", "1");
put("", "2");
put("", "3");
put("", "4");
put("", "5");
put("", "6");
put("", "7");
put("", "8");
put("", "9");
put("", "aa");
put("ি", "i");
put("", "ii");
put("", "u");
put("", "uu");
put("", "r");
put("", "e");
put("", "o");
put("", "oi");
put("", "ou");
put("", "ou");
put("", "ng");
put("", "h");
put("", "nN");
put("", ".");
2017-11-22 18:57:05 +01:00
}
};
2018-08-24 16:57:38 +02:00
// The regex to extract Bengali characters in nested groups.
private final static String pattern = "(র্){0,1}(([অ-হড়-য়])(্([অ-মশ-হড়-য়]))*)((){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|([্ঁঃংৎ০-৯।])| ";
private final static Pattern bengaliRegex = Pattern.compile(pattern);
2017-11-22 18:57:05 +01:00
private static String getVal(String key) {
if (key != null) {
boolean hasKey = composites.containsKey(key);
if (hasKey) {
return composites.get(key);
}
hasKey = letters.containsKey(key);
if (hasKey) {
return letters.get(key);
}
}
return null;
}
2017-11-22 18:57:05 +01:00
public static String transliterate(String txt) {
if (txt.isEmpty()) {
return txt;
}
Matcher m = bengaliRegex.matcher(txt);
StringBuffer sb = new StringBuffer();
while (m.find()) {
String appendableString = "";
String reff = m.group(1);
if (reff != null) {
appendableString = appendableString + "rr";
2017-11-22 18:57:05 +01:00
}
2018-08-24 16:57:38 +02:00
// This is a filter-down approach. First considering larger groups,
// If found any match breaks their. Else go to the next step.
// Helpful to solve some corner-cases.
String mainPart = getVal(m.group(2));
if (mainPart != null) {
appendableString = appendableString + mainPart;
} else {
String firstPart = getVal(m.group(3));
if (firstPart != null) {
appendableString = appendableString + firstPart;
}
int g = 4;
while (g < 6) {
String part = getVal(m.group(g));
if (part != null) {
appendableString = appendableString + part;
break;
}
g = g + 1;
}
}
int g = 6;
while (g < 10) {
String key = getVal(m.group(g));
if (key != null) {
appendableString = appendableString + key;
break;
}
g = g + 1;
}
String kaar = m.group(10);
if (kaar != null) {
boolean hasKey = letters.containsKey(kaar);
if (hasKey) {
appendableString = appendableString + letters.get(kaar);
2017-11-22 18:57:05 +01:00
}
} else if (appendableString.length() > 0 && !vowelsAndHasants.containsKey(m.group(0))) {
2018-08-24 16:57:38 +02:00
// Adding 'a' like ITRANS if no vowel is present.
// TODO: Have to add it dynamically using Bengali grammer rules.
appendableString = appendableString + "a";
2017-11-22 18:57:05 +01:00
}
String singleton = m.group(11);
if (singleton != null) {
boolean hasKeyS = letters.containsKey(singleton);
if (hasKeyS) {
appendableString = appendableString + letters.get(singleton);
}
}
String others = m.group(0);
if (others != null) {
2017-11-22 18:57:05 +01:00
if (appendableString.length() <= 0) {
appendableString = appendableString + others;
}
}
m.appendReplacement(sb, appendableString);
2017-11-22 18:57:05 +01:00
}
m.appendTail(sb);
return sb.toString();
2017-11-22 18:57:05 +01:00
}
}