Skip to content

Commit 3e3533c

Browse files
authored
Merge pull request #25 from osori/codex/fix-issue-with-korean-romanizer
Fix issue #11 by romanizing isolated jamo
2 parents 763da9c + d552c53 commit 3e3533c

File tree

2 files changed

+22
-10
lines changed

2 files changed

+22
-10
lines changed

korean_romanizer/romanizer.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import re
22

3-
from korean_romanizer.syllable import Syllable
3+
from korean_romanizer.syllable import (
4+
Syllable,
5+
unicode_compatible_consonants,
6+
unicode_initial,
7+
)
48
from korean_romanizer.pronouncer import Pronouncer
59

610
'''
@@ -96,6 +100,13 @@
96100

97101
None: '',
98102
}
103+
104+
# Compatibility jamo (e.g. ㄱ, ㄴ) do not appear as part of a full syllable.
105+
# Map them to their onset romanization so single jamo can be transliterated.
106+
compat_onset = {
107+
comp: onset[unicode_initial[i]]
108+
for i, comp in enumerate(unicode_compatible_consonants)
109+
}
99110

100111
class Romanizer(object):
101112
def __init__(self, text):
@@ -111,13 +122,14 @@ def romanize(self):
111122

112123
if not s.medial and not s.final:
113124
# s is NOT a full syllable (e.g. characters)
114-
# if onset.get(chr(s.initial)):
115-
# _romanized += onset[chr(s.initial)]
116-
# elif vowel.get(chr(s.initial)):
117-
# _romanized += vowel[chr(s.initial)]
118-
# else:
119-
# _romanized += char
120-
_romanized += char
125+
if char in vowel:
126+
_romanized += vowel[char]
127+
elif char in onset:
128+
_romanized += onset[char]
129+
elif char in compat_onset:
130+
_romanized += compat_onset[char]
131+
else:
132+
_romanized += char
121133
else:
122134
# s is a full syllable
123135
_romanized += onset[s.initial] + vowel[s.medial] + coda[s.final]

tests/test_romanizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ def test_double_consonant_final_without_next_syllable():
7070

7171

7272
def test_non_syllables():
73-
assert romanize("ㅠㄴㅁㄱ") == "ㅠㄴㅁㄱ"
74-
assert romanize("ㅠ동") == "ㅠdong"
73+
assert romanize("ㅠㄴㅁㄱ") == "yunmg"
74+
assert romanize("ㅠ동") == "yudong"
7575

7676

7777
def test_coda_h():

0 commit comments

Comments
 (0)