1 : <?php
2 :
3 : /**
4 : * Dictionaries of Old French and Latin
5 : *
6 : * PHP 5
7 : *
8 : * @category Application
9 : * @package DicFro
10 : * @author Michel Corne <mcorne@yahoo.com>
11 : * @copyright 2008-2010 Michel Corne
12 : * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3 (GPLv3)
13 : * @link http://www.micmap.org/dicfro
14 : */
15 :
16 : /**
17 : * Converts strings
18 : *
19 : * @category Application
20 : * @package DicFro
21 : * @author Michel Corne <mcorne@yahoo.com>
22 : * @copyright 2008-2010 Michel Corne
23 : * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3 (GPLv3)
24 : */
25 :
26 : class Common_String {
27 : /**
28 : * Mapping of accentuated letters to ASCII letters
29 : *
30 : * @var array
31 : */
32 : public $accentuated = array(// /
33 : // accentuated letters
34 : 'search' => array(
35 : 'Á', 'À', 'Â', 'Ä', 'Ç', 'É', 'È', 'Ê', 'Ë', 'Í', 'Ì', 'Î', 'Ï', 'Ñ', 'Ó', 'Ò', 'Ô', 'Ö', 'Ú', 'Ù', 'Û', 'Ü', 'Ÿ',
36 : // Latin diacritic vowels (AEIOU): A with macron, A with breve, etc...
37 : 'Ā', 'Ă', 'Ē', 'Ĕ', 'Ī', 'Ĭ', 'Ō', 'Ŏ', 'Ū', 'Ŭ',
38 : ),
39 : // corresponding ASCII letters
40 : 'replace' => array(
41 : 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', 'N', 'O', 'O', 'O', 'O', 'U', 'U', 'U', 'U', 'Y',
42 : 'A', 'A', 'E', 'E', 'I', 'I', 'O', 'O', 'U', 'U',
43 : ),
44 : );
45 :
46 : /**
47 : * Mapping of non ASCCI letters to DOS character map letters
48 : *
49 : * @var array
50 : */
51 : public $utf8ToCP850 = array(// http://en.wikipedia.org/wiki/Code_page_850
52 : 'search' => array(// /
53 : 'Ç', 'ü', 'é', 'â', 'ä', 'à', 'å', 'ç', 'ê', 'ë', 'è', 'ï', 'î', 'ì', 'Ä', 'Å',
54 : 'É', 'æ', 'Æ', 'ô', 'ö', 'ò', 'û', 'ù', 'ÿ', 'Ö', 'Ü', 'ø', '£', 'Ø', '×', 'ƒ',
55 : 'á', 'í', 'ó', 'ú', 'ñ', 'Ñ', 'ª', 'º', '¿', '®', '¬', '½', '¼', '¡', '«', '»',
56 : '', '', '', '', '', 'Á', 'Â', 'À', '©', '', '', '', '', '¢', '¥', '',
57 : '', '', '', '', '', '', 'ã', 'Ã', '', '', '', '', '', '', '', '¤',
58 : 'ð', 'Ð', 'Ê', 'Ë', 'È', '', 'Í', 'Î', 'Ï', '', '', '', '', '', 'Ì', '',
59 : 'Ó', 'ß', 'Ô', 'Ò', 'õ', 'Õ', 'µ', 'þ', 'Þ', 'Ú', 'Û', 'Ù', 'ý', 'Ý', '¯', '´',
60 : '', '±', '', '¾', '¶', '§', '÷', '¸', '°', '¨', '·', '¹', '³', '²', '', '',
61 : ),
62 : 'replace' => array(// /
63 : "\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87", "\x88", "\x89", "\x8A", "\x8B", "\x8C", "\x8D", "\x8E", "\x8F",
64 : "\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97", "\x99", "\x99", "\x9A", "\x9B", "\x9C", "\x9D", "\x9E", "\x9F",
65 : "\xA0", "\xA1", "\xA2", "\xA3", "\xA4", "\xA5", "\xA6", "\xA7", "\xAA", "\xA9", "\xAA", "\xAB", "\xAC", "\xAD", "\xAE", "\xAF",
66 : "\xB0", "\xB1", "\xB2", "\xB3", "\xB4", "\xB5", "\xB6", "\xB7", "\xBB", "\xB9", "\xBA", "\xBB", "\xBC", "\xBD", "\xBE", "\xBF",
67 : "\xC0", "\xC1", "\xC2", "\xC3", "\xC4", "\xC5", "\xC6", "\xC7", "\xCC", "\xC9", "\xCA", "\xCB", "\xCC", "\xCD", "\xCE", "\xCF",
68 : "\xD0", "\xD1", "\xD2", "\xD3", "\xD4", "\xD5", "\xD6", "\xD7", "\xDD", "\xD9", "\xDA", "\xDB", "\xDC", "\xDD", "\xDE", "\xDF",
69 : "\xE0", "\xE1", "\xE2", "\xE3", "\xE4", "\xE5", "\xE6", "\xE7", "\xEE", "\xE9", "\xEA", "\xEB", "\xEC", "\xED", "\xEE", "\xEF",
70 : "\xF0", "\xF1", "\xF2", "\xF3", "\xF4", "\xF5", "\xF6", "\xF7", "\xFF", "\xF9", "\xFA", "\xFB", "\xFC", "\xFD", "\xFE", "\xFF",
71 : ),
72 : );
73 : /**
74 : * The constructor
75 : *
76 : * @return void
77 : */
78 : public function __construct()
79 : {
80 : // sets the regex encoding to UTF-8
81 81 : mb_regex_encoding('UTF-8');
82 81 : }
83 :
84 : /**
85 : * Converts a string with dash separated words to camel case
86 : *
87 : * @param array $string the string with dash separated words
88 : * @return string the camel case string
89 : */
90 : public function dash2CamelCase($string)
91 : {
92 9 : $string = str_replace('-', ' ', $string);
93 9 : $string = ucwords($string);
94 9 : $string and $string = strtolower($string[0]) . substr($string, 1);
95 :
96 9 : return str_replace(' ', '', $string);
97 : }
98 :
99 : /**
100 : * Expands the ligatures in a string
101 : *
102 : * @param string $string the string
103 : * @return string the expanded string
104 : */
105 : public function expandLigature($string)
106 : {
107 31 : $string = mb_ereg_replace('Æ', 'AE', $string);
108 31 : $string = mb_ereg_replace('Œ', 'OE', $string);
109 :
110 31 : return $string;
111 : }
112 :
113 : /**
114 : * Converts a string from the internal encoding to UTF-8
115 : *
116 : * @param string $string the string to convert
117 : * @param string $internalEncoding the internal encoding (used for testing)
118 : * @return string the converted string
119 : */
120 : public function internalToUtf8($string, $internalEncoding = null)
121 : {
122 : // gets the internal encoding for console displaying purposes
123 1 : $internalEncoding or $internalEncoding = mb_internal_encoding();
124 :
125 1 : return mb_convert_encoding($string, 'UTF-8', $internalEncoding);
126 : }
127 :
128 : /**
129 : * Determines if the process is running as DOS shell
130 : *
131 : * @param string $os the name of the OS (used for testing)
132 : * @param string $sapi the name of the interface (used for testing)
133 : * @return true if DOS shell, false otherwise
134 : */
135 : public function isDos($os = PHP_OS, $sapi = PHP_SAPI)
136 : {
137 2 : return (stripos($os, 'win') !== false and stripos($sapi, 'cli') !== false);
138 : }
139 :
140 : /**
141 : * Removes accents from a string
142 : *
143 : * @param string $string the string to process
144 : * @return string the string without accents
145 : */
146 : public function removeAccents($string)
147 : {
148 31 : return str_replace($this->accentuated['search'], $this->accentuated['replace'], $string);
149 : }
150 :
151 : /**
152 : * Converts a string to latin characters
153 : *
154 : * @param string $string the string to process
155 : * @return string the converted string
156 : */
157 : public function toLatin($string)
158 : {
159 : // converts to upper case, expands ligatures, removes accents
160 5 : $string = $this->utf8toASCII($string);
161 5 : $string = strtr($string, 'JU', 'IV');
162 :
163 5 : return $string;
164 : }
165 :
166 : /**
167 : * Converts a string to upper case letters
168 : *
169 : * @param string $string the string to convert
170 : * @return string the converted string
171 : */
172 : public function toUpper($string)
173 : {
174 31 : return mb_convert_case($string, MB_CASE_UPPER, 'UTF-8');
175 : }
176 :
177 : /**
178 : * Converts a string from UTF-8 to ASCII
179 : *
180 : * @param string $string the string to convert
181 : * @param string $remove the characters to remove in a regex
182 : * @return string the converted string
183 : */
184 : public function utf8toASCII($string, $remove = '~[^A-Z]~')
185 : {
186 : // converts to upper case, expands ligatures, removes accents
187 30 : $string = $this->toUpper($string);
188 30 : $string = $this->expandLigature($string);
189 30 : $string = $this->removeAccents($string);
190 : // removes dashes, apostrophes ...
191 30 : $string = preg_replace($remove, '', $string);
192 :
193 30 : return $string;
194 : }
195 :
196 : /**
197 : * Converts a string from UTF-8 to ASCII or digits
198 : *
199 : * @param string $string the string to convert
200 : * @return string the converted string
201 : */
202 : public function utf8toASCIIorDigit($string)
203 : {
204 : // converts to upper case etc... and keeps dashes as in "car-", stars as in "prendre*",
205 : // and digits except 1 as in "sol3" (see vandaele)
206 1 : return $this->utf8toASCII($string, '~[^A-Z2-9\-\*]~');
207 : }
208 :
209 : /**
210 : * Converts a string or array of strings from UTF-8 to the internal encoding
211 : *
212 : * @param mixed $mixed the string or array of strings to convert
213 : * @return mixed the converted string or array of strings
214 : */
215 : public function utf8ToInternal($mixed)
216 : {
217 1 : if (is_array($mixed)) {
218 1 : $mixed = array_map(array($this, __FUNCTION__), $mixed);
219 1 : } else {
220 1 : $mixed = $this->utf8ToInternalString($mixed);
221 : }
222 :
223 1 : return $mixed;
224 : }
225 :
226 : /**
227 : * Converts a string from UTF-8 to the internal encoding
228 : *
229 : * @param string $string the string to convert
230 : * @param string $internalEncoding the internal encoding (used for testing)
231 : * @param boolean $isDos true for DOS shell, false otherwise (used for testing)
232 : * @return string the converted string
233 : */
234 : public function utf8ToInternalString($string, $internalEncoding = null, $isDos = null)
235 : {
236 2 : is_null($isDos) and $isDos = $this->isDos();
237 :
238 2 : if ($isDos) {
239 : // the process is running as DOS shell, converts with CP850 charset
240 2 : $string = str_replace($this->utf8ToCP850['search'], $this->utf8ToCP850['replace'], $string);
241 :
242 2 : } else {
243 : // gets the internal encoding for console displaying purposes
244 1 : $internalEncoding or $internalEncoding = mb_internal_encoding();
245 : // converts to the internal encoding
246 1 : $string = mb_convert_encoding($string, $internalEncoding, 'UTF-8');
247 : }
248 :
249 2 : return $string;
250 : }
|