1 : <?php
2 : /**
3 : * Roman de Renart
4 : *
5 : * PHP version 5
6 : *
7 : * @category Rdr
8 : * @package Edit
9 : * @author Michel Corne <mcorne@yahoo.com>
10 : * @copyright 2010 Michel Corne
11 : * @license http://www.opensource.org/licenses/bsd-license.php The BSD License
12 : * @link http://roman-de-renart.blogspot.com/
13 : * @version SVN: $Id$
14 : */
15 :
16 : require_once 'Episode.php';
17 :
18 : /**
19 : * Finding the differences between the text of Meon and the old French text of reference
20 : * in an episode
21 : *
22 : * @category Rdr
23 : * @package Edit
24 : * @author Michel Corne <mcorne@yahoo.com>
25 : * @copyright 2010 Michel Corne
26 : * @license http://www.opensource.org/licenses/bsd-license.php The BSD License
27 : */
28 :
29 : class Differences extends Episode
30 : {
31 : /**
32 : * The format of a difference
33 : */
34 : const DIFFERENCE_FMT = '%s : Méon %s %s (%sFHS %s %s)';
35 :
36 : /**
37 : * The error message reported when the difference cannot be parsed
38 : */
39 : const ERR_PARSE_DIFFERENCE = 'cannot parse difference';
40 :
41 : /**
42 : * The format of the difference part concerning the Martin text
43 : */
44 : const MARTIN_FMT = 'Martin %s %s, ';
45 :
46 : /**
47 : * The message reported when there are no differences
48 : */
49 : const MSG_NO_DIFFERENCE = 'There are no differences.';
50 :
51 : /**
52 : * The keys of the columns used for differences
53 : * @var array
54 : */
55 : public static $differenceKeys = array(
56 : Episode::COL_DIFFERENCES,
57 : Episode::COL_FRO_NUMBERS,
58 : );
59 :
60 : /**
61 : * Finds the different words between Meon and the old French text of reference
62 : *
63 : * @param array $meonWords the words from Meon
64 : * @param array $froWords the words from the old French text of reference
65 : * @return array the words to remove from Meon
66 : * and the words to add in the old French text of reference
67 : * @see http://www.holomind.de/phpnet/diff2.src.php
68 : */
69 : public function diffMeonFroWords($meonWords, $froWords)
70 : {
71 3 : $t1 = $meonWords;
72 3 : $t2 = $froWords;
73 :
74 : # build a reverse-index array using the line as key and line number as value
75 : # don't store blank lines, so they won't be targets of the shortest distance
76 : # search
77 3 : foreach($t1 as $i=>$x) if ($x>'') $r1[$x][]=$i;
78 3 : foreach($t2 as $i=>$x) if ($x>'') $r2[$x][]=$i;
79 :
80 3 : $a1=0; $a2=0; # start at beginning of each list
81 3 : $actions=array();
82 :
83 : # walk this loop until we reach the end of one of the lists
84 3 : while ($a1<count($t1) && $a2<count($t2)) {
85 : # if we have a common element, save it and go to the next
86 3 : if ($t1[$a1]==$t2[$a2]) { $actions[]=4; $a1++; $a2++; continue; }
87 :
88 : # otherwise, find the shortest move (Manhattan-distance) from the
89 : # current location
90 3 : $best1=count($t1); $best2=count($t2);
91 3 : $s1=$a1; $s2=$a2;
92 :
93 3 : while(($s1+$s2-$a1-$a2) < ($best1+$best2-$a1-$a2)) {
94 3 : $d=-1;
95 3 : foreach((array)@$r1[$t2[$s2]] as $n)
96 2 : if ($n>=$s1) { $d=$n; break; }
97 3 : if ($d>=$s1 && ($d+$s2-$a1-$a2)<($best1+$best2-$a1-$a2))
98 3 : { $best1=$d; $best2=$s2; }
99 3 : $d=-1;
100 3 : foreach((array)@$r2[$t1[$s1]] as $n)
101 2 : if ($n>=$s2) { $d=$n; break; }
102 3 : if ($d>=$s2 && ($s1+$d-$a1-$a2)<($best1+$best2-$a1-$a2))
103 3 : { $best1=$s1; $best2=$d; }
104 3 : $s1++; $s2++;
105 3 : }
106 3 : while ($a1<$best1) { $actions[]=1; $a1++; } # deleted elements
107 3 : while ($a2<$best2) { $actions[]=2; $a2++; } # added elements
108 3 : }
109 :
110 : # we've reached the end of one list, now walk to the end of the other
111 3 : while($a1<count($t1)) { $actions[]=1; $a1++; } # deleted elements
112 3 : while($a2<count($t2)) { $actions[]=2; $a2++; } # added elements
113 :
114 : # and this marks our ending point
115 3 : $actions[]=8;
116 :
117 : # now, let's follow the path we just took and report the added/deleted
118 : # elements into $out.
119 3 : $op = 0;
120 3 : $x0=$x1=0; $y0=$y1=0;
121 3 : $meonDiff = array();
122 3 : $froDiff = array();
123 :
124 3 : foreach($actions as $act) {
125 3 : if ($act==1) { $op|=$act; $x1++; continue; }
126 3 : if ($act==2) { $op|=$act; $y1++; continue; }
127 3 : if ($op>0) {
128 3 : $xstr = ($x1==($x0+1)) ? $x1 : ($x0+1).",$x1";
129 3 : $ystr = ($y1==($y0+1)) ? $y1 : ($y0+1).",$y1";
130 3 : while ($x0<$x1) { $meonDiff[] = $t1[$x0]; $x0++; } # deleted elems
131 3 : while ($y0<$y1) { $froDiff[] = $t2[$y0]; $y0++; } # added elems
132 3 : }
133 3 : $x1++; $x0=$x1;
134 3 : $y1++; $y0=$y1;
135 3 : $op=0;
136 3 : }
137 :
138 3 : return array($meonDiff, $froDiff);
139 : }
140 :
141 : /**
142 : * Finds the words to remove from a Meon line of text
143 : * and the words to add in a line of the old French text of reference
144 : * and if the added words are also in Martin
145 : *
146 : * @param array $row the texts lines in a row
147 : * @return mixed the words removed from Meon,
148 : * and the words added in the old French text of reference,
149 : * and if the added words are in Martin
150 : */
151 : public function diffMeonFroMartinLines($row)
152 : {
153 2 : $meonLine = $row[Episode::COL_MEON_TEXT_FIXED];
154 2 : $froLine = $row[Episode::COL_FRO_TEXT];
155 2 : $martinLine = $row[Episode::COL_MARTIN_TEXT_ORIG];
156 :
157 2 : $meonWords = $this->splitLine($meonLine);
158 2 : $froWords = $this->splitLine($froLine);
159 2 : list($meonDiff, $froDiff) = $this->diffMeonFroWords($meonWords, $froWords);
160 :
161 : // finds if the words added in the old French text of reference are also in Martin
162 2 : $inMartin = ($froDiff and $this->isFixInMartin($froDiff, $martinLine));
163 :
164 2 : if ($meonDiff and $froDiff) {
165 2 : $meonRemoved = $this->implodeLine($meonWords, $meonDiff);
166 2 : $froAdded = $this->implodeLine($froWords, $froDiff);
167 :
168 2 : } else if (!$meonDiff and $froDiff) {
169 : // words not in meon but in fro
170 2 : if ($meonLine) {
171 : // meon not empty
172 : // $meonRemoved = count($froDiff) > 1? '[absents]' : '[absent]'; // singular ok
173 1 : $meonRemoved = '[absent]';
174 1 : $froAdded = $this->implodeLine($froWords, $froDiff, ',');
175 1 : } else {
176 : // verse not in meon but in fro
177 : // $meonRemoved = '[absent]'; // actually captured in mapping
178 : // $froAdded = trim($froLine, ',;.:!? '); // actually captured in mapping
179 2 : $meonRemoved = '';
180 2 : $froAdded = '';
181 : }
182 :
183 2 : } else if ($meonDiff and !$froDiff) {
184 : // words in meon but not in fro
185 2 : if ($froLine) {
186 : // fro not empty
187 : // $froAdded = count($meonDiff) > 1? '[supprimés]' : '[supprimé]' ; // singular ok
188 1 : $froAdded = '[supprimé]' ;
189 1 : $meonRemoved = $this->implodeLine($meonWords, $meonDiff, ',');
190 1 : } else {
191 : // verse in meon but not in fro
192 : // $froAdded = '[supprimé]'; // actually captured in mapping
193 : // $meonRemoved = trim($meonLine, ',;.:!? '); // actually captured in mapping
194 2 : $froAdded = '';
195 2 : $meonRemoved = '';
196 : }
197 :
198 2 : } else {
199 : // no difference or verse in martin only
200 2 : $meonRemoved = '';
201 2 : $froAdded = '';
202 : }
203 :
204 2 : return array($meonRemoved, $froAdded, $inMartin);
205 : }
206 :
207 : /**
208 : * Converts the difference into a string
209 : *
210 : * @param array $row the texts lines in a row
211 : * @param string $meonRemoved the words removed from Meon
212 : * @param string $froAdded the words added in the old French text of reference
213 : * @param bool $inMartin the added words are in Martin if true, false otherwise
214 : * @return string the difference as a string
215 : */
216 : public function diffToString($row, $meonRemoved, $froAdded, $inMartin)
217 : {
218 2 : if ($froAdded) {
219 2 : $martinString = $inMartin? sprintf(self::MARTIN_FMT,
220 2 : $row[Episode::COL_MARTIN_CHAPTERS], $row[Episode::COL_MARTIN_NUMBERS]) : '';
221 :
222 2 : $difference = sprintf(self::DIFFERENCE_FMT,
223 2 : $row[Episode::COL_FRO_NUMBERS],
224 2 : $row[Episode::COL_MEON_NUMBERS], $meonRemoved,
225 2 : $martinString,
226 2 : $row[Episode::COL_FRO_NUMBERS], $froAdded);
227 :
228 2 : } else {
229 2 : $difference = '';
230 : }
231 :
232 2 : return $difference;
233 : }
234 :
235 : /**
236 : * Finds the differences between the text of Meon and the old French text of reference
237 : *
238 : * @return string the list of differences or a message if there are no differences
239 : */
240 : public function findDifferences()
241 : {
242 : // reads the rows
243 1 : $this->readSheet();
244 1 : $rows = $this->readRows();
245 :
246 1 : $differences = array();
247 :
248 : // finds the differences between the texts lines in each row
249 1 : foreach($rows as $row) {
250 1 : list($meonRemoved, $froAdded, $inMartin) = $this->diffMeonFroMartinLines($row);
251 1 : $differences[] = $this->diffToString($row, $meonRemoved, $froAdded, $inMartin);
252 1 : }
253 :
254 : // writes the differences in another column
255 1 : $this->writeColumn($differences, Episode::COL_DIFFERENCES);
256 1 : $this->writeSheet();
257 :
258 : // reports the differences
259 1 : $result = $this->arrayToString($differences) or $result = self::MSG_NO_DIFFERENCE;
260 :
261 1 : return $result;
262 : }
263 :
264 : /**
265 : * Implodes a text line but hiding unchanged words
266 : *
267 : * @param array $originalWords the original words
268 : * @param array $changedWsords the words added or removed
269 : * @param string $glue the string replacing unchanged words
270 : * @return string the imploded text line
271 : */
272 : public function implodeLine($originalWords, $changedWsords, $glue = '...')
273 : {
274 3 : $verse = '';
275 :
276 3 : foreach(array_intersect($originalWords, $changedWsords) as $idx => $word) {
277 3 : if ($verse) {
278 2 : $verse .= ' ';
279 2 : $idx != ($prevIdx + 1) and $verse .= $glue . ' ';
280 2 : }
281 :
282 3 : $verse .= $word;
283 3 : $prevIdx = $idx;
284 3 : }
285 :
286 3 : return $verse;
287 : }
288 :
289 : /**
290 : * Finds if a list of words is in a Martin text line
291 : *
292 : * @param array $words the words to find
293 : * @param string $martinLine the Martin text line
294 : * @return boolean true if all the words are found, false othewise
295 : */
296 : public function isFixInMartin($words, $martinLine)
297 : {
298 3 : $martinWords = $this->splitLine($martinLine);
299 :
300 3 : return $words and $martinLine and array_intersect($words, $martinWords) == $words;
301 : }
302 :
303 : /**
304 : * Splits a line of text into words
305 : *
306 : * @param string $line the line of text to split
307 : * @return mixed the words of the line of text
308 : */
309 : public function splitLine($line)
310 : {
311 4 : return preg_split('~[,;.:!? ]~', $line, -1, PREG_SPLIT_NO_EMPTY);
312 : }
313 :
314 : /**
315 : * Parses a difference
316 : *
317 : * @param string $difference the difference to parse
318 : * @return mixed the difference details
319 : */
320 : public function toCsv($difference)
321 : {
322 3 : $base = new Base;
323 :
324 3 : $pattern = $base->completePattern(self::DIFFERENCE_FMT, '(.*)');
325 3 : $difference = $base->match($pattern, $difference, self::ERR_PARSE_DIFFERENCE,
326 3 : Differences::$differenceKeys, true);
327 :
328 3 : return $difference;
329 : }
|