mirror of
https://codeberg.org/Freeyourgadget/Gadgetbridge.git
synced 2025-01-27 09:01:38 +01:00
Improve ASCII transliterator
We should use NFKD instead of NFD since we are flattening to US-ASCII afterwards anyway. This allows various Unicode characters which would end up as a question mark to be represented by their compatibility decomposition. This applies to e.g. ligatures (e.g. U+FB01 LATIN SMALL LIGATURE FI will now be replaced with plain fi instead of a question mark), and also the U+00A0 NO-BREAK SPACE [NBSP] to be replaced by a normal space instead of a question mark. +Add Czech fancy quotes to the Czech transliterator +Add a unit test for Multitransliterator
This commit is contained in:
parent
99b3dc3226
commit
ec73b244ee
@ -27,6 +27,7 @@ public class CzechTransliterator extends SimpleTransliterator {
|
|||||||
super(new HashMap<Character, String>() {{
|
super(new HashMap<Character, String>() {{
|
||||||
put('ř',"r"); put('ě',"e"); put('ý',"y"); put('á',"a"); put('í',"i"); put('é',"e");
|
put('ř',"r"); put('ě',"e"); put('ý',"y"); put('á',"a"); put('í',"i"); put('é',"e");
|
||||||
put('ó',"o"); put('ú',"u"); put('ů',"u"); put('ď',"d"); put('ť',"t"); put('ň',"n");
|
put('ó',"o"); put('ú',"u"); put('ů',"u"); put('ď',"d"); put('ť',"t"); put('ň',"n");
|
||||||
|
put('„', "\""); put('“', "\""); put('‚', "'"); put('‘', "'");
|
||||||
}});
|
}});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -32,8 +32,8 @@ public class FlattenToAsciiTransliterator implements Transliterator {
|
|||||||
return txt;
|
return txt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decompose the string into its canonical decomposition (splits base characters from accents/marks)
|
// Decompose the string into its compatible decomposition (splits base characters from accents/marks, and changes some characters to compatibility version)
|
||||||
txt = Normalizer.normalize(txt, Normalizer.Form.NFD);
|
txt = Normalizer.normalize(txt, Normalizer.Form.NFKD);
|
||||||
// Remove all marks (characters intended to be combined with another character), keeping the base glyphs
|
// Remove all marks (characters intended to be combined with another character), keeping the base glyphs
|
||||||
txt = txt.replaceAll("\\p{M}", "");
|
txt = txt.replaceAll("\\p{M}", "");
|
||||||
// Flatten the resulting string to ASCII
|
// Flatten the resulting string to ASCII
|
||||||
|
@ -4,9 +4,13 @@ import android.content.SharedPreferences;
|
|||||||
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import nodomain.freeyourgadget.gadgetbridge.GBApplication;
|
import nodomain.freeyourgadget.gadgetbridge.GBApplication;
|
||||||
import nodomain.freeyourgadget.gadgetbridge.impl.GBDevice;
|
import nodomain.freeyourgadget.gadgetbridge.impl.GBDevice;
|
||||||
import nodomain.freeyourgadget.gadgetbridge.test.TestBase;
|
import nodomain.freeyourgadget.gadgetbridge.test.TestBase;
|
||||||
|
import nodomain.freeyourgadget.gadgetbridge.util.language.impl.CzechTransliterator;
|
||||||
|
import nodomain.freeyourgadget.gadgetbridge.util.language.impl.ExtendedAsciiTransliterator;
|
||||||
import nodomain.freeyourgadget.gadgetbridge.util.language.impl.FlattenToAsciiTransliterator;
|
import nodomain.freeyourgadget.gadgetbridge.util.language.impl.FlattenToAsciiTransliterator;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
@ -227,12 +231,23 @@ public class LanguageUtilsTest extends TestBase {
|
|||||||
@Test
|
@Test
|
||||||
public void testFlattenToAscii() throws Exception {
|
public void testFlattenToAscii() throws Exception {
|
||||||
final FlattenToAsciiTransliterator transliterator = new FlattenToAsciiTransliterator();
|
final FlattenToAsciiTransliterator transliterator = new FlattenToAsciiTransliterator();
|
||||||
String input = "ä ș ț ă";
|
String input = "ä ș ț ă fine";
|
||||||
String output = transliterator.transliterate(input);
|
String output = transliterator.transliterate(input);
|
||||||
String expected = "a s t a";
|
String expected = "a s t a fine";
|
||||||
assertEquals("flatten to ascii transliteration failed", expected, output);
|
assertEquals("flatten to ascii transliteration failed", expected, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultitransliterator() throws Exception {
|
||||||
|
final MultiTransliterator multiTransliterator = new MultiTransliterator(Arrays.asList(
|
||||||
|
new CzechTransliterator(),
|
||||||
|
new ExtendedAsciiTransliterator(),
|
||||||
|
new FlattenToAsciiTransliterator()
|
||||||
|
));
|
||||||
|
assertEquals("Zlutoucky kun upel \"dabelske\" \"ody\"", multiTransliterator.transliterate("Žluťoučký kůň úpěl »ďábelské« „ódy“"));
|
||||||
|
assertEquals("300 Kc", multiTransliterator.transliterate("300\u00A0Kč"));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTransliterateOption() throws Exception {
|
public void testTransliterateOption() throws Exception {
|
||||||
enableTransliteration(false);
|
enableTransliteration(false);
|
||||||
|
Loading…
Reference in New Issue
Block a user