1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29:
30: class ElggTranslit {
31:
32: 33: 34: 35: 36: 37: 38:
39: static public function urlize($string, $separator = '-') {
40:
41:
42:
43: if (self::hasNormalizerSupport()) {
44: $nfc = normalizer_normalize($string);
45: if (is_string($nfc)) {
46: $string = $nfc;
47: }
48: }
49:
50: $string = self::transliterateAscii($string);
51:
52:
53: $string = preg_replace('~<([a-zA-Z][^>]*)>~', ' $1 ', $string);
54:
55:
56:
57: $string = strtr($string, array(
58:
59: "\xE2\x82\xAC" => ' E ',
60: "\xC2\xA3" => ' GBP ',
61: ));
62:
63:
64:
65: $string = preg_replace('~['
66: . '\x00-\x08'
67: . '\x0b\x0c'
68: . '\x0e-\x1f'
69: . '\x21-\x2c'
70: . '\x2e\x2f'
71: . '\x3a-\x40'
72: . '\x5b-\x5e'
73: . '\x60'
74: . '\x7b-\x7f'
75: . ']~', '', $string);
76: $string = strtr($string, '', '');
77:
78:
79:
80: $string = is_callable('mb_strtolower')
81: ? mb_strtolower($string, 'UTF-8')
82: : strtolower($string);
83:
84:
85:
86:
87: $pattern = '~['
88: . '\x00-\x2f'
89: . '\x3a-\x40'
90: . '\x5b-\x60'
91: . '\x7b-\x7f'
92: . ']+~x';
93:
94:
95: $words = preg_split($pattern, $string, -1, PREG_SPLIT_NO_EMPTY);
96:
97:
98: $words = array_map('urlencode', $words);
99:
100:
101: return implode($separator, $words);
102: }
103:
104: 105: 106: 107: 108: 109:
110: static public function transliterateAscii($utf8) {
111: static $map = null;
112: if (!preg_match('/[\x80-\xff]/', $utf8)) {
113: return $utf8;
114: }
115: if (null === $map) {
116: $map = self::getAsciiTranslitMap();
117: }
118: return strtr($utf8, $map);
119: }
120:
121: 122: 123: 124: 125:
126: static public function getAsciiTranslitMap() {
127: return array(
128:
129: "\xC2\xAA" => 'a', "\xC2\xBA" => 'o', "\xC3\x80" => 'A',
130: "\xC3\x81" => 'A', "\xC3\x82" => 'A', "\xC3\x83" => 'A',
131: "\xC3\x84" => 'A', "\xC3\x85" => 'A', "\xC3\x86" => 'AE',
132: "\xC3\x87" => 'C', "\xC3\x88" => 'E', "\xC3\x89" => 'E',
133: "\xC3\x8A" => 'E', "\xC3\x8B" => 'E', "\xC3\x8C" => 'I',
134: "\xC3\x8D" => 'I', "\xC3\x8E" => 'I', "\xC3\x8F" => 'I',
135: "\xC3\x90" => 'D', "\xC3\x91" => 'N', "\xC3\x92" => 'O',
136: "\xC3\x93" => 'O', "\xC3\x94" => 'O', "\xC3\x95" => 'O',
137: "\xC3\x96" => 'O', "\xC3\x99" => 'U', "\xC3\x9A" => 'U',
138: "\xC3\x9B" => 'U', "\xC3\x9C" => 'U', "\xC3\x9D" => 'Y',
139: "\xC3\x9E" => 'TH', "\xC3\x9F" => 'ss', "\xC3\xA0" => 'a',
140: "\xC3\xA1" => 'a', "\xC3\xA2" => 'a', "\xC3\xA3" => 'a',
141: "\xC3\xA4" => 'a', "\xC3\xA5" => 'a', "\xC3\xA6" => 'ae',
142: "\xC3\xA7" => 'c', "\xC3\xA8" => 'e', "\xC3\xA9" => 'e',
143: "\xC3\xAA" => 'e', "\xC3\xAB" => 'e', "\xC3\xAC" => 'i',
144: "\xC3\xAD" => 'i', "\xC3\xAE" => 'i', "\xC3\xAF" => 'i',
145: "\xC3\xB0" => 'd', "\xC3\xB1" => 'n', "\xC3\xB2" => 'o',
146: "\xC3\xB3" => 'o', "\xC3\xB4" => 'o', "\xC3\xB5" => 'o',
147: "\xC3\xB6" => 'o', "\xC3\xB8" => 'o', "\xC3\xB9" => 'u',
148: "\xC3\xBA" => 'u', "\xC3\xBB" => 'u', "\xC3\xBC" => 'u',
149: "\xC3\xBD" => 'y', "\xC3\xBE" => 'th', "\xC3\xBF" => 'y',
150: "\xC3\x98" => 'O',
151:
152: "\xC4\x80" => 'A', "\xC4\x81" => 'a', "\xC4\x82" => 'A',
153: "\xC4\x83" => 'a', "\xC4\x84" => 'A', "\xC4\x85" => 'a',
154: "\xC4\x86" => 'C', "\xC4\x87" => 'c', "\xC4\x88" => 'C',
155: "\xC4\x89" => 'c', "\xC4\x8A" => 'C', "\xC4\x8B" => 'c',
156: "\xC4\x8C" => 'C', "\xC4\x8D" => 'c', "\xC4\x8E" => 'D',
157: "\xC4\x8F" => 'd', "\xC4\x90" => 'D', "\xC4\x91" => 'd',
158: "\xC4\x92" => 'E', "\xC4\x93" => 'e', "\xC4\x94" => 'E',
159: "\xC4\x95" => 'e', "\xC4\x96" => 'E', "\xC4\x97" => 'e',
160: "\xC4\x98" => 'E', "\xC4\x99" => 'e', "\xC4\x9A" => 'E',
161: "\xC4\x9B" => 'e', "\xC4\x9C" => 'G', "\xC4\x9D" => 'g',
162: "\xC4\x9E" => 'G', "\xC4\x9F" => 'g', "\xC4\xA0" => 'G',
163: "\xC4\xA1" => 'g', "\xC4\xA2" => 'G', "\xC4\xA3" => 'g',
164: "\xC4\xA4" => 'H', "\xC4\xA5" => 'h', "\xC4\xA6" => 'H',
165: "\xC4\xA7" => 'h', "\xC4\xA8" => 'I', "\xC4\xA9" => 'i',
166: "\xC4\xAA" => 'I', "\xC4\xAB" => 'i', "\xC4\xAC" => 'I',
167: "\xC4\xAD" => 'i', "\xC4\xAE" => 'I', "\xC4\xAF" => 'i',
168: "\xC4\xB0" => 'I', "\xC4\xB1" => 'i', "\xC4\xB2" => 'IJ',
169: "\xC4\xB3" => 'ij', "\xC4\xB4" => 'J', "\xC4\xB5" => 'j',
170: "\xC4\xB6" => 'K', "\xC4\xB7" => 'k', "\xC4\xB8" => 'k',
171: "\xC4\xB9" => 'L', "\xC4\xBA" => 'l', "\xC4\xBB" => 'L',
172: "\xC4\xBC" => 'l', "\xC4\xBD" => 'L', "\xC4\xBE" => 'l',
173: "\xC4\xBF" => 'L', "\xC5\x80" => 'l', "\xC5\x81" => 'L',
174: "\xC5\x82" => 'l', "\xC5\x83" => 'N', "\xC5\x84" => 'n',
175: "\xC5\x85" => 'N', "\xC5\x86" => 'n', "\xC5\x87" => 'N',
176: "\xC5\x88" => 'n', "\xC5\x89" => 'N', "\xC5\x8A" => 'n',
177: "\xC5\x8B" => 'N', "\xC5\x8C" => 'O', "\xC5\x8D" => 'o',
178: "\xC5\x8E" => 'O', "\xC5\x8F" => 'o', "\xC5\x90" => 'O',
179: "\xC5\x91" => 'o', "\xC5\x92" => 'OE', "\xC5\x93" => 'oe',
180: "\xC5\x94" => 'R', "\xC5\x95" => 'r', "\xC5\x96" => 'R',
181: "\xC5\x97" => 'r', "\xC5\x98" => 'R', "\xC5\x99" => 'r',
182: "\xC5\x9A" => 'S', "\xC5\x9B" => 's', "\xC5\x9C" => 'S',
183: "\xC5\x9D" => 's', "\xC5\x9E" => 'S', "\xC5\x9F" => 's',
184: "\xC5\xA0" => 'S', "\xC5\xA1" => 's', "\xC5\xA2" => 'T',
185: "\xC5\xA3" => 't', "\xC5\xA4" => 'T', "\xC5\xA5" => 't',
186: "\xC5\xA6" => 'T', "\xC5\xA7" => 't', "\xC5\xA8" => 'U',
187: "\xC5\xA9" => 'u', "\xC5\xAA" => 'U', "\xC5\xAB" => 'u',
188: "\xC5\xAC" => 'U', "\xC5\xAD" => 'u', "\xC5\xAE" => 'U',
189: "\xC5\xAF" => 'u', "\xC5\xB0" => 'U', "\xC5\xB1" => 'u',
190: "\xC5\xB2" => 'U', "\xC5\xB3" => 'u', "\xC5\xB4" => 'W',
191: "\xC5\xB5" => 'w', "\xC5\xB6" => 'Y', "\xC5\xB7" => 'y',
192: "\xC5\xB8" => 'Y', "\xC5\xB9" => 'Z', "\xC5\xBA" => 'z',
193: "\xC5\xBB" => 'Z', "\xC5\xBC" => 'z', "\xC5\xBD" => 'Z',
194: "\xC5\xBE" => 'z', "\xC5\xBF" => 's',
195:
196: "\xC8\x98" => 'S', "\xC8\x99" => 's',
197: "\xC8\x9A" => 'T', "\xC8\x9B" => 't',
198:
199: "\xC6\xA0" => 'O', "\xC6\xA1" => 'o',
200: "\xC6\xAF" => 'U', "\xC6\xB0" => 'u',
201:
202: "\xE1\xBA\xA6" => 'A', "\xE1\xBA\xA7" => 'a',
203: "\xE1\xBA\xB0" => 'A', "\xE1\xBA\xB1" => 'a',
204: "\xE1\xBB\x80" => 'E', "\xE1\xBB\x81" => 'e',
205: "\xE1\xBB\x92" => 'O', "\xE1\xBB\x93" => 'o',
206: "\xE1\xBB\x9C" => 'O', "\xE1\xBB\x9D" => 'o',
207: "\xE1\xBB\xAA" => 'U', "\xE1\xBB\xAB" => 'u',
208: "\xE1\xBB\xB2" => 'Y', "\xE1\xBB\xB3" => 'y',
209:
210: "\xE1\xBA\xA2" => 'A', "\xE1\xBA\xA3" => 'a',
211: "\xE1\xBA\xA8" => 'A', "\xE1\xBA\xA9" => 'a',
212: "\xE1\xBA\xB2" => 'A', "\xE1\xBA\xB3" => 'a',
213: "\xE1\xBA\xBA" => 'E', "\xE1\xBA\xBB" => 'e',
214: "\xE1\xBB\x82" => 'E', "\xE1\xBB\x83" => 'e',
215: "\xE1\xBB\x88" => 'I', "\xE1\xBB\x89" => 'i',
216: "\xE1\xBB\x8E" => 'O', "\xE1\xBB\x8F" => 'o',
217: "\xE1\xBB\x94" => 'O', "\xE1\xBB\x95" => 'o',
218: "\xE1\xBB\x9E" => 'O', "\xE1\xBB\x9F" => 'o',
219: "\xE1\xBB\xA6" => 'U', "\xE1\xBB\xA7" => 'u',
220: "\xE1\xBB\xAC" => 'U', "\xE1\xBB\xAD" => 'u',
221: "\xE1\xBB\xB6" => 'Y', "\xE1\xBB\xB7" => 'y',
222:
223: "\xE1\xBA\xAA" => 'A', "\xE1\xBA\xAB" => 'a',
224: "\xE1\xBA\xB4" => 'A', "\xE1\xBA\xB5" => 'a',
225: "\xE1\xBA\xBC" => 'E', "\xE1\xBA\xBD" => 'e',
226: "\xE1\xBB\x84" => 'E', "\xE1\xBB\x85" => 'e',
227: "\xE1\xBB\x96" => 'O', "\xE1\xBB\x97" => 'o',
228: "\xE1\xBB\xA0" => 'O', "\xE1\xBB\xA1" => 'o',
229: "\xE1\xBB\xAE" => 'U', "\xE1\xBB\xAF" => 'u',
230: "\xE1\xBB\xB8" => 'Y', "\xE1\xBB\xB9" => 'y',
231:
232: "\xE1\xBA\xA4" => 'A', "\xE1\xBA\xA5" => 'a',
233: "\xE1\xBA\xAE" => 'A', "\xE1\xBA\xAF" => 'a',
234: "\xE1\xBA\xBE" => 'E', "\xE1\xBA\xBF" => 'e',
235: "\xE1\xBB\x90" => 'O', "\xE1\xBB\x91" => 'o',
236: "\xE1\xBB\x9A" => 'O', "\xE1\xBB\x9B" => 'o',
237: "\xE1\xBB\xA8" => 'U', "\xE1\xBB\xA9" => 'u',
238:
239: "\xE1\xBA\xA0" => 'A', "\xE1\xBA\xA1" => 'a',
240: "\xE1\xBA\xAC" => 'A', "\xE1\xBA\xAD" => 'a',
241: "\xE1\xBA\xB6" => 'A', "\xE1\xBA\xB7" => 'a',
242: "\xE1\xBA\xB8" => 'E', "\xE1\xBA\xB9" => 'e',
243: "\xE1\xBB\x86" => 'E', "\xE1\xBB\x87" => 'e',
244: "\xE1\xBB\x8A" => 'I', "\xE1\xBB\x8B" => 'i',
245: "\xE1\xBB\x8C" => 'O', "\xE1\xBB\x8D" => 'o',
246: "\xE1\xBB\x98" => 'O', "\xE1\xBB\x99" => 'o',
247: "\xE1\xBB\xA2" => 'O', "\xE1\xBB\xA3" => 'o',
248: "\xE1\xBB\xA4" => 'U', "\xE1\xBB\xA5" => 'u',
249: "\xE1\xBB\xB0" => 'U', "\xE1\xBB\xB1" => 'u',
250: "\xE1\xBB\xB4" => 'Y', "\xE1\xBB\xB5" => 'y',
251: );
252: }
253:
254: 255: 256: 257: 258:
259: static public function hasNormalizerSupport() {
260: static $ret = null;
261: if (null === $ret) {
262: $form_c = "\xC3\x85";
263: $form_d = "A\xCC\x8A";
264: $ret = (function_exists('normalizer_normalize')
265: && $form_c === normalizer_normalize($form_d));
266: }
267: return $ret;
268: }
269: }
270: