2020-01-09 10:44:32 +01:00
|
|
|
|
/* Copyright (C) 2017-2020 Aniruddha Adhikary, Carsten Pfeiffer, Daniele
|
2019-02-13 20:43:30 +01:00
|
|
|
|
Gobbetti, Utsob Roy
|
2018-08-29 21:30:23 +02:00
|
|
|
|
|
|
|
|
|
This file is part of Gadgetbridge.
|
|
|
|
|
|
|
|
|
|
Gadgetbridge is free software: you can redistribute it and/or modify
|
|
|
|
|
it under the terms of the GNU Affero General Public License as published
|
|
|
|
|
by the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
|
|
Gadgetbridge is distributed in the hope that it will be useful,
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
GNU Affero General Public License for more details.
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
2017-11-22 18:57:05 +01:00
|
|
|
|
package nodomain.freeyourgadget.gadgetbridge.util;
|
|
|
|
|
|
|
|
|
|
import java.util.HashMap;
|
2019-01-26 15:52:40 +01:00
|
|
|
|
import java.util.regex.Matcher;
|
|
|
|
|
import java.util.regex.Pattern;
|
2017-11-22 18:57:05 +01:00
|
|
|
|
|
2018-08-24 16:57:38 +02:00
|
|
|
|
// What's the reason to extending LanguageUtils?
|
|
|
|
|
// Just doing it because already done in the previous code.
|
2017-11-22 18:57:05 +01:00
|
|
|
|
public class BengaliLanguageUtils extends LanguageUtils {
|
2018-10-01 23:04:51 +02:00
|
|
|
|
// Composite Letters.
|
2018-08-31 20:38:15 +02:00
|
|
|
|
private final static HashMap<String, String> composites = new HashMap<String, String>() {
|
2017-11-22 18:57:05 +01:00
|
|
|
|
{
|
2018-08-24 10:42:27 +02:00
|
|
|
|
put("ক্ষ", "kkh");
|
|
|
|
|
put("ঞ্চ", "NC");
|
|
|
|
|
put("ঞ্ছ", "NCh");
|
|
|
|
|
put("ঞ্জ", "Ng");
|
|
|
|
|
put("জ্ঞ", "gg");
|
|
|
|
|
put("ঞ্ঝ", "Ngh");
|
|
|
|
|
put("্র", "r");
|
|
|
|
|
put("্ল", "l");
|
|
|
|
|
put("ষ্ম", "SSh");
|
|
|
|
|
put("র্", "r");
|
|
|
|
|
put("্য", "y");
|
|
|
|
|
put("্ব", "w");
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
|
|
|
|
};
|
2018-10-01 23:04:51 +02:00
|
|
|
|
|
2018-08-24 18:33:25 +02:00
|
|
|
|
// Vowels Only
|
2018-10-01 23:04:51 +02:00
|
|
|
|
private final static HashMap<String, String> vowels = new HashMap<String, String>() {
|
|
|
|
|
{
|
|
|
|
|
put("আ", "aa");
|
|
|
|
|
put("অ", "a");
|
|
|
|
|
put("ই", "i");
|
|
|
|
|
put("ঈ", "ii");
|
|
|
|
|
put("উ", "u");
|
|
|
|
|
put("ঊ", "uu");
|
|
|
|
|
put("ঋ", "ri");
|
|
|
|
|
put("এ", "e");
|
|
|
|
|
put("ঐ", "oi");
|
|
|
|
|
put("ও", "o");
|
|
|
|
|
put("ঔ", "ou");
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Vowels and Hasants
|
2018-08-31 20:38:15 +02:00
|
|
|
|
private final static HashMap<String, String> vowelsAndHasants = new HashMap<String, String>() {
|
2017-11-22 18:57:05 +01:00
|
|
|
|
{
|
2018-08-24 18:33:25 +02:00
|
|
|
|
put("আ", "aa");
|
|
|
|
|
put("অ", "a");
|
|
|
|
|
put("ই", "i");
|
|
|
|
|
put("ঈ", "ii");
|
|
|
|
|
put("উ", "u");
|
|
|
|
|
put("ঊ", "uu");
|
|
|
|
|
put("ঋ", "ri");
|
|
|
|
|
put("এ", "e");
|
|
|
|
|
put("ঐ", "oi");
|
|
|
|
|
put("ও", "o");
|
|
|
|
|
put("ঔ", "ou");
|
|
|
|
|
put("া", "aa");
|
|
|
|
|
put("ি", "i");
|
|
|
|
|
put("ী", "ii");
|
|
|
|
|
put("ু", "u");
|
|
|
|
|
put("ূ", "uu");
|
|
|
|
|
put("ৃ", "r");
|
|
|
|
|
put("ে", "e");
|
|
|
|
|
put("ো", "o");
|
|
|
|
|
put("ৈ", "oi");
|
|
|
|
|
put("ৗ", "ou");
|
|
|
|
|
put("ৌ", "ou");
|
|
|
|
|
put("ং", "ng");
|
|
|
|
|
put("ঃ", "h");
|
|
|
|
|
put("।", ".");
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2018-08-24 16:57:38 +02:00
|
|
|
|
// Single Character Letters.
|
2018-08-31 20:38:15 +02:00
|
|
|
|
private final static HashMap<String, String> letters = new HashMap<String, String>() {
|
2017-11-22 18:57:05 +01:00
|
|
|
|
{
|
2018-08-24 10:42:27 +02:00
|
|
|
|
put("আ", "aa");
|
|
|
|
|
put("অ", "a");
|
|
|
|
|
put("ই", "i");
|
|
|
|
|
put("ঈ", "ii");
|
|
|
|
|
put("উ", "u");
|
|
|
|
|
put("ঊ", "uu");
|
|
|
|
|
put("ঋ", "ri");
|
|
|
|
|
put("এ", "e");
|
|
|
|
|
put("ঐ", "oi");
|
|
|
|
|
put("ও", "o");
|
|
|
|
|
put("ঔ", "ou");
|
|
|
|
|
put("ক", "k");
|
|
|
|
|
put("খ", "kh");
|
|
|
|
|
put("গ", "g");
|
|
|
|
|
put("ঘ", "gh");
|
|
|
|
|
put("ঙ", "ng");
|
|
|
|
|
put("চ", "ch");
|
|
|
|
|
put("ছ", "chh");
|
|
|
|
|
put("জ", "j");
|
|
|
|
|
put("ঝ", "jh");
|
|
|
|
|
put("ঞ", "Ng");
|
|
|
|
|
put("ট", "T");
|
|
|
|
|
put("ঠ", "Th");
|
|
|
|
|
put("ড", "D");
|
|
|
|
|
put("ঢ", "Dh");
|
|
|
|
|
put("ণ", "N");
|
|
|
|
|
put("ত", "t");
|
|
|
|
|
put("থ", "th");
|
|
|
|
|
put("দ", "d");
|
|
|
|
|
put("ধ", "dh");
|
|
|
|
|
put("ন", "n");
|
|
|
|
|
put("প", "p");
|
|
|
|
|
put("ফ", "ph");
|
|
|
|
|
put("ব", "b");
|
|
|
|
|
put("ভ", "bh");
|
|
|
|
|
put("ম", "m");
|
|
|
|
|
put("য", "J");
|
|
|
|
|
put("র", "r");
|
|
|
|
|
put("ল", "l");
|
|
|
|
|
put("শ", "sh");
|
|
|
|
|
put("ষ", "Sh");
|
|
|
|
|
put("স", "s");
|
|
|
|
|
put("হ", "h");
|
|
|
|
|
put("ড়", "rh");
|
|
|
|
|
put("ঢ়", "rH");
|
|
|
|
|
put("য়", "y");
|
|
|
|
|
put("ৎ", "t");
|
|
|
|
|
put("০", "0");
|
|
|
|
|
put("১", "1");
|
|
|
|
|
put("২", "2");
|
|
|
|
|
put("৩", "3");
|
|
|
|
|
put("৪", "4");
|
|
|
|
|
put("৫", "5");
|
|
|
|
|
put("৬", "6");
|
|
|
|
|
put("৭", "7");
|
|
|
|
|
put("৮", "8");
|
|
|
|
|
put("৯", "9");
|
|
|
|
|
put("া", "aa");
|
|
|
|
|
put("ি", "i");
|
|
|
|
|
put("ী", "ii");
|
|
|
|
|
put("ু", "u");
|
|
|
|
|
put("ূ", "uu");
|
|
|
|
|
put("ৃ", "r");
|
|
|
|
|
put("ে", "e");
|
|
|
|
|
put("ো", "o");
|
|
|
|
|
put("ৈ", "oi");
|
|
|
|
|
put("ৗ", "ou");
|
|
|
|
|
put("ৌ", "ou");
|
|
|
|
|
put("ং", "ng");
|
|
|
|
|
put("ঃ", "h");
|
|
|
|
|
put("ঁ", "nN");
|
|
|
|
|
put("।", ".");
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2018-08-24 16:57:38 +02:00
|
|
|
|
// The regex to extract Bengali characters in nested groups.
|
2018-10-01 23:04:51 +02:00
|
|
|
|
private final static String pattern = "(র্){0,1}(([অ-হড়-য়])(্([অ-মশ-হড়-য়]))*)((){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|([্ঁঃংৎ০-৯।])|(\\s)";
|
|
|
|
|
|
2018-08-31 20:38:15 +02:00
|
|
|
|
private final static Pattern bengaliRegex = Pattern.compile(pattern);
|
2017-11-22 18:57:05 +01:00
|
|
|
|
|
2018-08-31 20:38:15 +02:00
|
|
|
|
private static String getVal(String key) {
|
|
|
|
|
if (key != null) {
|
2018-09-01 04:59:57 +02:00
|
|
|
|
String comp = composites.get(key);
|
|
|
|
|
if (comp != null) {
|
|
|
|
|
return comp;
|
2018-08-31 20:38:15 +02:00
|
|
|
|
}
|
2018-09-01 04:59:57 +02:00
|
|
|
|
String sl = letters.get(key);
|
|
|
|
|
if (sl != null) {
|
2018-08-31 20:38:15 +02:00
|
|
|
|
return letters.get(key);
|
|
|
|
|
}
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
2018-08-31 20:38:15 +02:00
|
|
|
|
return null;
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static String transliterate(String txt) {
|
|
|
|
|
if (txt.isEmpty()) {
|
|
|
|
|
return txt;
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-31 20:38:15 +02:00
|
|
|
|
Matcher m = bengaliRegex.matcher(txt);
|
2018-08-24 10:42:27 +02:00
|
|
|
|
StringBuffer sb = new StringBuffer();
|
2018-10-01 23:04:51 +02:00
|
|
|
|
String lastChar = "";
|
|
|
|
|
boolean lastHadComposition = false;
|
|
|
|
|
boolean lastHadKaar = false;
|
|
|
|
|
boolean nextNeedsO = false;
|
|
|
|
|
int lastHadO = 0;
|
2018-08-24 10:42:27 +02:00
|
|
|
|
while (m.find()) {
|
2018-10-01 23:04:51 +02:00
|
|
|
|
boolean thisNeedsO = false;
|
|
|
|
|
boolean changePronounciation = false;
|
|
|
|
|
boolean thisHadKaar = false;
|
2018-08-24 10:42:27 +02:00
|
|
|
|
String appendableString = "";
|
|
|
|
|
String reff = m.group(1);
|
|
|
|
|
if (reff != null) {
|
|
|
|
|
appendableString = appendableString + "rr";
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
2018-08-24 16:57:38 +02:00
|
|
|
|
// This is a filter-down approach. First considering larger groups,
|
|
|
|
|
// If found any match breaks their. Else go to the next step.
|
|
|
|
|
// Helpful to solve some corner-cases.
|
2018-08-31 20:38:15 +02:00
|
|
|
|
String mainPart = getVal(m.group(2));
|
|
|
|
|
if (mainPart != null) {
|
|
|
|
|
appendableString = appendableString + mainPart;
|
|
|
|
|
} else {
|
|
|
|
|
String firstPart = getVal(m.group(3));
|
|
|
|
|
if (firstPart != null) {
|
|
|
|
|
appendableString = appendableString + firstPart;
|
|
|
|
|
}
|
|
|
|
|
int g = 4;
|
|
|
|
|
while (g < 6) {
|
|
|
|
|
String part = getVal(m.group(g));
|
|
|
|
|
if (part != null) {
|
|
|
|
|
appendableString = appendableString + part;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
g = g + 1;
|
|
|
|
|
}
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
2018-10-01 23:04:51 +02:00
|
|
|
|
if (m.group(2) != null && m.group(2).equals("ক্ষ")) {
|
|
|
|
|
changePronounciation = true;
|
|
|
|
|
thisNeedsO = true;
|
|
|
|
|
}
|
2018-08-24 18:33:25 +02:00
|
|
|
|
int g = 6;
|
|
|
|
|
while (g < 10) {
|
|
|
|
|
String key = getVal(m.group(g));
|
2018-08-31 20:38:15 +02:00
|
|
|
|
if (key != null) {
|
|
|
|
|
appendableString = appendableString + key;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
g = g + 1;
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
2018-10-01 23:04:51 +02:00
|
|
|
|
String phala = m.group(8);
|
|
|
|
|
if (phala != null && phala.equals("্য")) {
|
|
|
|
|
changePronounciation = true;
|
|
|
|
|
thisNeedsO = true;
|
|
|
|
|
}
|
|
|
|
|
String jukto = m.group(4);
|
|
|
|
|
if (jukto != null) {
|
|
|
|
|
thisNeedsO = true;
|
|
|
|
|
}
|
2018-08-24 18:33:25 +02:00
|
|
|
|
String kaar = m.group(10);
|
2018-08-24 10:42:27 +02:00
|
|
|
|
if (kaar != null) {
|
2018-09-01 04:59:57 +02:00
|
|
|
|
String kaarStr = letters.get(kaar);
|
|
|
|
|
if (kaarStr != null) {
|
|
|
|
|
appendableString = appendableString + kaarStr;
|
2018-11-01 23:16:14 +01:00
|
|
|
|
if (kaarStr.equals("i") || kaarStr.equals("ii") || kaarStr.equals("u") || kaarStr.equals("uu")) {
|
|
|
|
|
changePronounciation = true;
|
|
|
|
|
}
|
2018-10-01 23:04:51 +02:00
|
|
|
|
}
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
2018-08-31 20:38:15 +02:00
|
|
|
|
String singleton = m.group(11);
|
|
|
|
|
if (singleton != null) {
|
2018-09-01 04:59:57 +02:00
|
|
|
|
String singleStr = letters.get(singleton);
|
|
|
|
|
if (singleStr != null) {
|
|
|
|
|
appendableString = appendableString + singleStr;
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
2018-10-01 23:04:51 +02:00
|
|
|
|
if (changePronounciation && lastChar.equals("a")) {
|
|
|
|
|
sb.setCharAt(sb.length() - 1, 'o');
|
|
|
|
|
}
|
2018-08-24 10:42:27 +02:00
|
|
|
|
String others = m.group(0);
|
|
|
|
|
if (others != null) {
|
2017-11-22 18:57:05 +01:00
|
|
|
|
|
2018-08-24 10:42:27 +02:00
|
|
|
|
if (appendableString.length() <= 0) {
|
|
|
|
|
appendableString = appendableString + others;
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-10-01 23:04:51 +02:00
|
|
|
|
String whitespace = m.group(12);
|
|
|
|
|
if (nextNeedsO && kaar == null && whitespace == null && !vowels.containsKey(m.group(0))) {
|
|
|
|
|
appendableString = appendableString + "o";
|
|
|
|
|
lastHadO++;
|
|
|
|
|
thisNeedsO = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (((kaar != null && lastHadO > 1) || whitespace != null) && !lastHadKaar && sb.length() > 0
|
|
|
|
|
&& sb.charAt(sb.length() - 1) == 'o' && !lastHadComposition) {
|
|
|
|
|
sb.deleteCharAt(sb.length() - 1);
|
|
|
|
|
lastHadO = 0;
|
|
|
|
|
}
|
|
|
|
|
nextNeedsO = false;
|
|
|
|
|
if (thisNeedsO && kaar == null && whitespace == null && !vowels.containsKey(m.group(0))) {
|
|
|
|
|
appendableString = appendableString + "o";
|
|
|
|
|
lastHadO++;
|
|
|
|
|
}
|
|
|
|
|
if (appendableString.length() > 0 && !vowelsAndHasants.containsKey(m.group(0)) && kaar == null) {
|
|
|
|
|
nextNeedsO = true;
|
|
|
|
|
}
|
|
|
|
|
if (reff != null || m.group(4) != null || m.group(6) != null) {
|
|
|
|
|
lastHadComposition = true;
|
|
|
|
|
} else {
|
|
|
|
|
lastHadComposition = false;
|
|
|
|
|
}
|
|
|
|
|
if (kaar != null) {
|
|
|
|
|
lastHadKaar = true;
|
|
|
|
|
} else {
|
|
|
|
|
lastHadKaar = false;
|
|
|
|
|
}
|
2018-08-24 10:42:27 +02:00
|
|
|
|
m.appendReplacement(sb, appendableString);
|
2018-10-01 23:04:51 +02:00
|
|
|
|
lastChar = appendableString;
|
|
|
|
|
}
|
|
|
|
|
if (!lastHadKaar && sb.length() > 0 && sb.charAt(sb.length() - 1) == 'o' && !lastHadComposition) {
|
|
|
|
|
sb.deleteCharAt(sb.length() - 1);
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
2018-08-24 10:42:27 +02:00
|
|
|
|
m.appendTail(sb);
|
|
|
|
|
return sb.toString();
|
2017-11-22 18:57:05 +01:00
|
|
|
|
}
|
2018-08-24 10:42:27 +02:00
|
|
|
|
}
|