Gadgetbridge/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/LanguageUtils.java
Roi e35240c91e fix bugs.
Use getDirectionality instead of serchinf areas
2018-09-04 01:10:54 +03:00

294 lines
12 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* Copyright (C) 2017-2018 Andreas Shimokawa, Daniele Gobbetti, ivanovlev,
lazarosfs, McSym28, Ted Stein, Yaron Shahrabani
This file is part of Gadgetbridge.
Gadgetbridge is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Gadgetbridge is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
package nodomain.freeyourgadget.gadgetbridge.util;
import android.text.TextUtils;
import android.util.Log;
import org.apache.commons.lang3.text.WordUtils;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import nodomain.freeyourgadget.gadgetbridge.GBApplication;
public class LanguageUtils {
/**
* Checks the status of right-to-left option
* @return true if right-to-left option is On, and false, if Off or not exist
*/
public static boolean rtlSupport()
{
return GBApplication.getPrefs().getBoolean("rtl", false);
}
//transliteration map with english equivalent for unsupported chars
private static Map<Character, String> transliterateMap = new HashMap<Character, String>(){
{
//extended ASCII characters
put('œ', "oe"); put('ª', "a"); put('º', "o"); put('«',"\""); put('»',"\"");
// Scandinavian characters
put('Æ',"Ae"); put('æ',"ae");
put('Ø',"Oe"); put('ø',"oe");
put('Å',"Aa"); put('å',"aa");
//german characters
put('ä',"ae"); put('ö',"oe"); put('ü',"ue");
put('Ä',"Ae"); put('Ö',"Oe"); put('Ü',"Üe");
put('ß',"ss"); put('ẞ',"SS");
//russian chars
put('а', "a"); put('б', "b"); put('в', "v"); put('г', "g"); put('д', "d"); put('е', "e"); put('ё', "jo"); put('ж', "zh");
put('з', "z"); put('и', "i"); put('й', "jj"); put('к', "k"); put('л', "l"); put('м', "m"); put('н', "n"); put('о', "o");
put('п', "p"); put('р', "r"); put('с', "s"); put('т', "t"); put('у', "u"); put('ф', "f"); put('х', "kh"); put('ц', "c");
put('ч', "ch");put('ш', "sh");put('щ', "shh");put('ъ', "\"");put('ы', "y"); put('ь', "'"); put('э', "eh"); put('ю', "ju");
put('я', "ja");
//hebrew chars
put('א', "a"); put('ב', "b"); put('ג', "g"); put('ד', "d"); put('ה', "h"); put('ו', "u"); put('ז', "z"); put('ח', "kh");
put('ט', "t"); put('י', "y"); put('כ', "c"); put('ל', "l"); put('מ', "m"); put('נ', "n"); put('ס', "s"); put('ע', "'");
put('פ', "p"); put('צ', "ts"); put('ק', "k"); put('ר', "r"); put('ש', "sh"); put('ת', "th"); put('ף', "f"); put('ץ', "ts");
put('ך', "ch");put('ם', "m");put('ן', "n");
// greek chars
put('α',"a");put('ά',"a");put('β',"v");put('γ',"g");put('δ',"d");put('ε',"e");put('έ',"e");put('ζ',"z");put('η',"i");
put('ή',"i");put('θ',"th");put('ι',"i");put('ί',"i");put('ϊ',"i");put('ΐ',"i");put('κ',"k");put('λ',"l");put('μ',"m");
put('ν',"n");put('ξ',"ks");put('ο',"o");put('ό',"o");put('π',"p");put('ρ',"r");put('σ',"s");put('ς',"s");put('τ',"t");
put('υ',"y");put('ύ',"y");put('ϋ',"y");put('ΰ',"y");put('φ',"f");put('χ',"ch");put('ψ',"ps");put('ω',"o");put('ώ',"o");
put('Α',"A");put('Ά',"A");put('Β',"B");put('Γ',"G");put('Δ',"D");put('Ε',"E");put('Έ',"E");put('Ζ',"Z");put('Η',"I");
put('Ή',"I");put('Θ',"TH");put('Ι',"I");put('Ί',"I");put('Ϊ',"I");put('Κ',"K");put('Λ',"L");put('Μ',"M");put('Ν',"N");
put('Ξ',"KS");put('Ο',"O");put('Ό',"O");put('Π',"P");put('Ρ',"R");put('Σ',"S");put('Τ',"T");put('Υ',"Y");put('Ύ',"Y");
put('Ϋ',"Y");put('Φ',"F");put('Χ',"CH");put('Ψ',"PS");put('Ω',"O");put('Ώ',"O");
//ukrainian characters
put('ґ', "gh"); put('є', "je"); put('і', "i"); put('ї', "ji"); put('Ґ', "GH"); put('Є', "JE"); put('І', "I"); put('Ї', "JI");
// Arabic
put('ا', "a"); put('ب', "b"); put('ت', "t"); put('ث', "th"); put('ج', "j"); put('ح', "7"); put('خ', "5");
put('د', "d"); put('ذ', "th"); put('ر', "r"); put('ز', "z"); put('س', "s"); put('ش', "sh"); put('ص', "9");
put('ض', "9'"); put('ط', "6"); put('ظ', "6'"); put('ع', "3"); put('غ', "3'"); put('ف', "f");
put('ق', "q"); put('ك', "k"); put('ل', "l"); put('م', "m"); put('ن', "n"); put('ه', "h");
put('و', "w"); put('ي', "y"); put('ى', "a"); put('ﺓ', "");
put('آ', "2"); put('ئ', "2"); put('إ', "2"); put('ؤ', "2"); put('أ', "2"); put('ء', "2");
// Farsi
put('پ', "p"); put('چ', "ch"); put('ڜ', "ch"); put('ڤ', "v"); put('ڥ', "v");
put('ڨ', "g"); put('گ', "g"); put('ݣ', "g");
// Polish
put('Ł', "L"); put('ł', "l");
//Lithuanian
put('ą', "a"); put('č', "c"); put('ę', "e"); put('ė', "e"); put('į', "i"); put('š', "s"); put('ų', "u"); put('ū', "u"); put('ž', "z");
//TODO: these must be configurable. If someone wants to transliterate cyrillic it does not mean his device has no German umlauts
//all or nothing is really bad here
}
};
/**
* Checks the status of transliteration option
* @return true if transliterate option is On, and false, if Off or not exist
*/
public static boolean transliterate()
{
return GBApplication.getPrefs().getBoolean("transliteration", false);
}
/**
* Replaces unsupported symbols to english
* @param txt input text
* @return transliterated text
*/
public static String transliterate(String txt){
if (txt == null || txt.isEmpty()) {
return txt;
}
StringBuilder message = new StringBuilder();
char[] chars = txt.toCharArray();
for (Character c : chars)
{
message.append(transliterate(c));
}
String messageString = BengaliLanguageUtils.transliterate(message.toString());
return flattenToAscii(messageString);
}
/**
* Replaces unsupported symbol to english by {@code transliterateMap}
* @param c input char
* @return replacement text
*/
private static String transliterate(Character c){
Character lowerChar = Character.toLowerCase(c);
if (transliterateMap.containsKey(lowerChar)) {
String replace = transliterateMap.get(lowerChar);
if (lowerChar != c)
{
return WordUtils.capitalize(replace);
}
return replace;
}
return String.valueOf(c);
}
/**
* Converts the diacritics
* @param string input text
* @return converted text
*/
private static String flattenToAscii(String string) {
string = Normalizer.normalize(string, Normalizer.Form.NFD);
return string.replaceAll("\\p{M}", "");
}
/**
* The function get a string and fix the rtl words.
* since simple reverse puts the beginning of the text at the end, the text should have been from bottom to top.
* To avoid that, we save the text in lines (line max size can be change in the settings)
* @param s - the string to fix.
* @return a fix string.
*/
public static String fixRtl(String s) {
if (s == null || s.isEmpty()){
return s;
}
Log.d("ROIGR", "before: |" + org.apache.commons.lang3.StringEscapeUtils.escapeJava(s) + "|");
int length = s.length();
String oldString = s.substring(0, length);
String newString = "";
List<String> lines = new ArrayList<>();
char[] newWord = new char[length];
int line_max_size = GBApplication.getPrefs().getInt("rtl_max_line_length", 20);;
int startPos = 0;
int endPos = 0;
RtlUtils.characterType CurRtlType = RtlUtils.isRtl(oldString.charAt(0))? RtlUtils.characterType.rtl : RtlUtils.characterType.ltr;
RtlUtils.characterType PhraseRtlType = CurRtlType;
Character c;
// String word = "", phrase = "", line = "";
StringBuilder word = new StringBuilder();
StringBuilder phrase = new StringBuilder();
StringBuilder line = new StringBuilder();
String phraseString = "";
boolean addCharToWord = false;
for (int i = 0; i < length; i++) {
c = oldString.charAt(i);
addCharToWord = false;
Log.d("ROIGR", "char: " + c + " :" + Character.getDirectionality(c));
// Log.d("ROIGR", "hex : " + (int)c);
if (RtlUtils.isLtr(c)){
CurRtlType = RtlUtils.characterType.ltr;
} else if (RtlUtils.isRtl(c)) {
CurRtlType = RtlUtils.characterType.rtl;
}
if ((CurRtlType == PhraseRtlType) && !(RtlUtils.isSpaceSign(c) || RtlUtils.isEndLineSign(c))){
Log.d("ROIGR", "add: " + c + " to: " + word);
word.append(c);
addCharToWord = true;
if (i < length - 1) {
continue;
}
}
do {
if (line.length() + phrase.length() + word.length() < line_max_size) {
if (RtlUtils.isSpaceSign(c)) {
word.append(c);
addCharToWord = true;
}
phrase.append(word);
word.setLength(0);
if (RtlUtils.isSpaceSign(c)) {
break;
}
}
phraseString = phrase.toString();
Log.d("ROIGR", "phrase: |" + phraseString + "|");
if (PhraseRtlType == RtlUtils.characterType.rtl) {
if (RtlUtils.contextualSupport()) {
phraseString = RtlUtils.converToContextual(phraseString);
}
phraseString = RtlUtils.reverse(phraseString);
}
line.insert(0, RtlUtils.fixWhitespace(phraseString));
Log.d("ROIGR", "line now: |" + line + "|");
phrase.setLength(0);
if (word.length() > 0){
line.append('\n');
} else if (RtlUtils.isEndLineSign(c)) {
line.append(c);
} else if (!addCharToWord) {
word.append(c);
if (i == length - 1){
addCharToWord = true;
continue;
}
PhraseRtlType = PhraseRtlType == RtlUtils.characterType.rtl ? RtlUtils.characterType.ltr : RtlUtils.characterType.rtl;
break;
}
lines.add(line.toString());
Log.d("ROIGR", "line: |" + line + "|");
line.setLength(0);
if (word.length() == 0){
break;
}
} while (true);
}
lines.add(line.toString());
newString = TextUtils.join("", lines);
Log.d("ROIGR", "after : |" + org.apache.commons.lang3.StringEscapeUtils.escapeJava(newString) + "|");
return newString;
}
}