1
1
import enchant ,sys
2
2
3
+ # requires PyEnchant library
4
+
3
5
# to be able to support Python 2 & 3
4
6
if sys .version_info [0 ] > 2 :
5
7
unicode = str
6
8
7
9
8
10
def __concat (object1 , object2 ):
11
+
9
12
if isinstance (object1 , str ) or isinstance (object1 , unicode ):
10
13
object1 = [object1 ]
11
14
if isinstance (object2 , str ) or isinstance (object2 , unicode ):
@@ -14,10 +17,12 @@ def __concat(object1, object2):
14
17
15
18
16
19
def __capitalize_first_char (word ):
20
+
17
21
return word [0 ].upper () + word [1 :]
18
22
19
23
20
- def split (word , language = 'en_us' ):
24
+ def __split (word , language = 'en_US' ):
25
+
21
26
dictionary = enchant .Dict (language )
22
27
max_index = len (word )
23
28
for index , char in enumerate (word ):
@@ -32,12 +37,12 @@ def split(word, language='en_us'):
32
37
left_compound = __capitalize_first_char (left_compound )
33
38
is_left_compound_valid_word = len (left_compound ) > 1 and dictionary .check (left_compound )
34
39
if is_left_compound_valid_word and \
35
- ((not split (right_compound_1 , language ) == '' and not right_compound1_upper ) or right_compound_1 == '' ):
36
- return [compound for compound in __concat (left_compound , split (right_compound_1 , language ))\
40
+ ((not __split (right_compound_1 , language ) == '' and not right_compound1_upper ) or right_compound_1 == '' ):
41
+ return [compound for compound in __concat (left_compound , __split (right_compound_1 , language ))\
37
42
if not compound == '' ]
38
43
elif is_left_compound_valid_word and word [max_index - index :max_index - index + 1 ] == 's' and \
39
- ((not split (right_compound_2 , language ) == '' and not right_compound2_upper ) or right_compound_2 == '' ):
40
- return [compound for compound in __concat (left_compound , split (right_compound_2 , language ))\
44
+ ((not __split (right_compound_2 , language ) == '' and not right_compound2_upper ) or right_compound_2 == '' ):
45
+ return [compound for compound in __concat (left_compound , __split (right_compound_2 , language ))\
41
46
if not compound == '' ]
42
47
if not word == '' and dictionary .check (word ):
43
48
return word
@@ -47,4 +52,17 @@ def split(word, language='en_us'):
47
52
return ''
48
53
49
54
50
- print (split ("undertake" ))
55
+ def split (compound_word ,language = 'en_US' ):
56
+
57
+ words = compound_word .split ('-' )
58
+
59
+ simple_words = []
60
+
61
+ for word in words :
62
+ result = __split (word , language )
63
+ for val in result :
64
+ simple_words .append (val )
65
+
66
+ return simple_words
67
+
68
+
0 commit comments