Rdr Edit Tests
Current file: D:\Data\micmap\cgi-bin\rdr\edit\application\Differences.php
Legend: executed not executed dead code

  Coverage
  Classes Functions / Methods Lines
Total
100.00%100.00%
100.00% 1 / 1
100.00%100.00%
100.00% 8 / 8
100.00%100.00%
100.00% 115 / 115
 
Differences
100.00%100.00%
100.00% 1 / 1
100.00%100.00%
100.00% 8 / 8
100.00%100.00%
100.00% 115 / 115
 public function diffMeonFroWords($meonWords, $froWords)
100.00%100.00%
100.00% 1 / 1
100.00%100.00%
100.00% 47 / 47
 public function diffMeonFroMartinLines($row)
100.00%100.00%
100.00% 1 / 1
100.00%100.00%
100.00% 28 / 28
 public function diffToString($row, $meonRemoved, $froAdded, $inMartin)
100.00%100.00%
100.00% 1 / 1
100.00%100.00%
100.00% 11 / 11
 public function findDifferences()
100.00%100.00%
100.00% 1 / 1
100.00%100.00%
100.00% 11 / 11
 public function implodeLine($originalWords, $changedWsords, $glue = '...')
100.00%100.00%
100.00% 1 / 1
100.00%100.00%
100.00% 10 / 10
 public function isFixInMartin($words, $martinLine)
100.00%100.00%
100.00% 1 / 1
100.00%100.00%
100.00% 2 / 2
 public function splitLine($line)
100.00%100.00%
100.00% 1 / 1
100.00%100.00%
100.00% 1 / 1
 public function toCsv($difference)
100.00%100.00%
100.00% 1 / 1
100.00%100.00%
100.00% 5 / 5


       1                 : <?php                                                                                                
       2                 : /**                                                                                                  
       3                 :  * Roman de Renart                                                                                   
       4                 :  *                                                                                                   
       5                 :  * PHP version 5                                                                                     
       6                 :  *                                                                                                   
       7                 :  * @category  Rdr                                                                                    
       8                 :  * @package   Edit                                                                                   
       9                 :  * @author    Michel Corne <mcorne@yahoo.com>                                                        
      10                 :  * @copyright 2010 Michel Corne                                                                      
      11                 :  * @license   http://www.opensource.org/licenses/bsd-license.php The BSD License                     
      12                 :  * @link      http://roman-de-renart.blogspot.com/                                                   
      13                 :  * @version   SVN: $Id$                                                                              
      14                 :  */                                                                                                  
      15                 :                                                                                                      
      16                 : require_once 'Episode.php';                                                                          
      17                 :                                                                                                      
      18                 : /**                                                                                                  
      19                 :  * Finding the differences between the text of Meon and the old French text of reference             
      20                 :  * in an episode                                                                                     
      21                 :  *                                                                                                   
      22                 :  * @category  Rdr                                                                                    
      23                 :  * @package   Edit                                                                                   
      24                 :  * @author    Michel Corne <mcorne@yahoo.com>                                                        
      25                 :  * @copyright 2010 Michel Corne                                                                      
      26                 :  * @license   http://www.opensource.org/licenses/bsd-license.php The BSD License                     
      27                 :  */                                                                                                  
      28                 :                                                                                                      
      29                 : class Differences extends Episode                                                                    
      30                 : {                                                                                                    
      31                 :     /**                                                                                              
      32                 :      * The format of a difference                                                                    
      33                 :      */                                                                                              
      34                 :     const DIFFERENCE_FMT = '%s : Méon %s %s (%sFHS %s %s)';                                         
      35                 :                                                                                                      
      36                 :     /**                                                                                              
      37                 :      * The error message reported when the difference cannot be parsed                               
      38                 :      */                                                                                              
      39                 :     const ERR_PARSE_DIFFERENCE = 'cannot parse difference';                                          
      40                 :                                                                                                      
      41                 :     /**                                                                                              
      42                 :      * The format of the difference part concerning the Martin text                                  
      43                 :      */                                                                                              
      44                 :     const MARTIN_FMT = 'Martin %s %s, ';                                                             
      45                 :                                                                                                      
      46                 :     /**                                                                                              
      47                 :      * The message reported when there are no differences                                            
      48                 :      */                                                                                              
      49                 :     const MSG_NO_DIFFERENCE = 'There are no differences.';                                           
      50                 :                                                                                                      
      51                 :     /**                                                                                              
      52                 :      * The keys of the columns used for differences                                                  
      53                 :      * @var array                                                                                    
      54                 :      */                                                                                              
      55                 :     public static $differenceKeys = array(                                                           
      56                 :         Episode::COL_DIFFERENCES,                                                                    
      57                 :         Episode::COL_FRO_NUMBERS,                                                                    
      58                 :     );                                                                                               
      59                 :                                                                                                      
      60                 :     /**                                                                                              
      61                 :      * Finds the different words between Meon and the old French text of reference                   
      62                 :      *                                                                                               
      63                 :      * @param  array $meonWords the words from Meon                                                  
      64                 :      * @param  array $froWords  the words from the old French text of reference                      
      65                 :      * @return array the words to remove from Meon                                                   
      66                 :      *               and the words to add in the old French text of reference                        
      67                 :      * @see    http://www.holomind.de/phpnet/diff2.src.php                                           
      68                 :      */                                                                                              
      69                 :     public function diffMeonFroWords($meonWords, $froWords)                                          
      70                 :     {                                                                                                
      71               3 :         $t1 = $meonWords;                                                                            
      72               3 :         $t2 = $froWords;                                                                             
      73                 :                                                                                                      
      74                 :         # build a reverse-index array using the line as key and line number as value                 
      75                 :         # don't store blank lines, so they won't be targets of the shortest distance                 
      76                 :         # search                                                                                     
      77               3 :         foreach($t1 as $i=>$x) if ($x>'') $r1[$x][]=$i;                                              
      78               3 :         foreach($t2 as $i=>$x) if ($x>'') $r2[$x][]=$i;                                              
      79                 :                                                                                                      
      80               3 :         $a1=0; $a2=0;   # start at beginning of each list                                            
      81               3 :         $actions=array();                                                                            
      82                 :                                                                                                      
      83                 :         # walk this loop until we reach the end of one of the lists                                  
      84               3 :         while ($a1<count($t1) && $a2<count($t2)) {                                                   
      85                 :             # if we have a common element, save it and go to the next                                
      86               3 :             if ($t1[$a1]==$t2[$a2]) { $actions[]=4; $a1++; $a2++; continue; }                        
      87                 :                                                                                                      
      88                 :             # otherwise, find the shortest move (Manhattan-distance) from the                        
      89                 :             # current location                                                                       
      90               3 :             $best1=count($t1); $best2=count($t2);                                                    
      91               3 :             $s1=$a1; $s2=$a2;                                                                        
      92                 :                                                                                                      
      93               3 :             while(($s1+$s2-$a1-$a2) < ($best1+$best2-$a1-$a2)) {                                     
      94               3 :                 $d=-1;                                                                               
      95               3 :                 foreach((array)@$r1[$t2[$s2]] as $n)                                                 
      96               2 :                     if ($n>=$s1) { $d=$n; break; }                                                   
      97               3 :                 if ($d>=$s1 && ($d+$s2-$a1-$a2)<($best1+$best2-$a1-$a2))                             
      98               3 :                     { $best1=$d; $best2=$s2; }                                                       
      99               3 :                 $d=-1;                                                                               
     100               3 :                 foreach((array)@$r2[$t1[$s1]] as $n)                                                 
     101               2 :                     if ($n>=$s2) { $d=$n; break; }                                                   
     102               3 :                 if ($d>=$s2 && ($s1+$d-$a1-$a2)<($best1+$best2-$a1-$a2))                             
     103               3 :                     { $best1=$s1; $best2=$d; }                                                       
     104               3 :                 $s1++; $s2++;                                                                        
     105               3 :             }                                                                                        
     106               3 :             while ($a1<$best1) { $actions[]=1; $a1++; }  # deleted elements                          
     107               3 :             while ($a2<$best2) { $actions[]=2; $a2++; }  # added elements                            
     108               3 :         }                                                                                            
     109                 :                                                                                                      
     110                 :         # we've reached the end of one list, now walk to the end of the other                        
     111               3 :         while($a1<count($t1)) { $actions[]=1; $a1++; }  # deleted elements                           
     112               3 :         while($a2<count($t2)) { $actions[]=2; $a2++; }  # added elements                             
     113                 :                                                                                                      
     114                 :         # and this marks our ending point                                                            
     115               3 :         $actions[]=8;                                                                                
     116                 :                                                                                                      
     117                 :         # now, let's follow the path we just took and report the added/deleted                       
     118                 :         # elements into $out.                                                                        
     119               3 :         $op = 0;                                                                                     
     120               3 :         $x0=$x1=0; $y0=$y1=0;                                                                        
     121               3 :         $meonDiff = array();                                                                         
     122               3 :         $froDiff = array();                                                                          
     123                 :                                                                                                      
     124               3 :         foreach($actions as $act) {                                                                  
     125               3 :             if ($act==1) { $op|=$act; $x1++; continue; }                                             
     126               3 :             if ($act==2) { $op|=$act; $y1++; continue; }                                             
     127               3 :             if ($op>0) {                                                                             
     128               3 :                 $xstr = ($x1==($x0+1)) ? $x1 : ($x0+1).",$x1";                                       
     129               3 :                 $ystr = ($y1==($y0+1)) ? $y1 : ($y0+1).",$y1";                                       
     130               3 :                 while ($x0<$x1) { $meonDiff[] = $t1[$x0]; $x0++; }   # deleted elems                 
     131               3 :                 while ($y0<$y1) { $froDiff[] = $t2[$y0]; $y0++; }   # added elems                    
     132               3 :             }                                                                                        
     133               3 :             $x1++; $x0=$x1;                                                                          
     134               3 :             $y1++; $y0=$y1;                                                                          
     135               3 :             $op=0;                                                                                   
     136               3 :         }                                                                                            
     137                 :                                                                                                      
     138               3 :         return array($meonDiff, $froDiff);                                                           
     139                 :     }                                                                                                
     140                 :                                                                                                      
     141                 :     /**                                                                                              
     142                 :      * Finds the words to remove from a Meon line of text                                            
     143                 :      * and the words to add in a line of the old French text of reference                            
     144                 :      * and if the added words are also in Martin                                                     
     145                 :      *                                                                                               
     146                 :      * @param  array $row the texts lines in a row                                                   
     147                 :      * @return mixed the words removed from Meon,                                                    
     148                 :      *               and the words added in the old French text of reference,                        
     149                 :      *               and if the added words are in Martin                                            
     150                 :      */                                                                                              
     151                 :     public function diffMeonFroMartinLines($row)                                                     
     152                 :     {                                                                                                
     153               2 :         $meonLine = $row[Episode::COL_MEON_TEXT_FIXED];                                              
     154               2 :         $froLine = $row[Episode::COL_FRO_TEXT];                                                      
     155               2 :         $martinLine = $row[Episode::COL_MARTIN_TEXT_ORIG];                                           
     156                 :                                                                                                      
     157               2 :         $meonWords = $this->splitLine($meonLine);                                                    
     158               2 :         $froWords = $this->splitLine($froLine);                                                      
     159               2 :         list($meonDiff, $froDiff) = $this->diffMeonFroWords($meonWords, $froWords);                  
     160                 :                                                                                                      
     161                 :         // finds if the words added in the old French text of reference are also in Martin           
     162               2 :         $inMartin = ($froDiff and $this->isFixInMartin($froDiff, $martinLine));                      
     163                 :                                                                                                      
     164               2 :         if ($meonDiff and $froDiff) {                                                                
     165               2 :             $meonRemoved = $this->implodeLine($meonWords, $meonDiff);                                
     166               2 :             $froAdded = $this->implodeLine($froWords, $froDiff);                                     
     167                 :                                                                                                      
     168               2 :         } else if (!$meonDiff and $froDiff) {                                                        
     169                 :             // words not in meon but in fro                                                          
     170               2 :             if ($meonLine) {                                                                         
     171                 :                 // meon not empty                                                                    
     172                 :                 // $meonRemoved = count($froDiff) > 1? '[absents]' : '[absent]'; // singular ok      
     173               1 :                 $meonRemoved = '[absent]';                                                           
     174               1 :                 $froAdded = $this->implodeLine($froWords, $froDiff, ',');                            
     175               1 :             } else {                                                                                 
     176                 :                 // verse not in meon but in fro                                                      
     177                 :                 // $meonRemoved = '[absent]'; // actually captured in mapping                        
     178                 :                 // $froAdded = trim($froLine, ',;.:!? '); // actually captured in mapping            
     179               2 :                 $meonRemoved = '';                                                                   
     180               2 :                 $froAdded = '';                                                                      
     181                 :             }                                                                                        
     182                 :                                                                                                      
     183               2 :         } else if ($meonDiff and !$froDiff) {                                                        
     184                 :             // words in meon but not in fro                                                          
     185               2 :             if ($froLine) {                                                                          
     186                 :                 // fro not empty                                                                     
     187                 :                 // $froAdded = count($meonDiff) > 1? '[supprimés]' : '[supprimé]' ;  // singular ok
     188               1 :                 $froAdded = '[supprimé]' ;                                                          
     189               1 :                 $meonRemoved = $this->implodeLine($meonWords, $meonDiff, ',');                       
     190               1 :             } else {                                                                                 
     191                 :                 // verse in meon but not in fro                                                      
     192                 :                 // $froAdded = '[supprimé]'; // actually captured in mapping                        
     193                 :                 // $meonRemoved = trim($meonLine, ',;.:!? '); // actually captured in mapping        
     194               2 :                 $froAdded = '';                                                                      
     195               2 :                 $meonRemoved = '';                                                                   
     196                 :             }                                                                                        
     197                 :                                                                                                      
     198               2 :         } else {                                                                                     
     199                 :             // no difference or verse in martin only                                                 
     200               2 :             $meonRemoved = '';                                                                       
     201               2 :             $froAdded = '';                                                                          
     202                 :         }                                                                                            
     203                 :                                                                                                      
     204               2 :         return array($meonRemoved, $froAdded, $inMartin);                                            
     205                 :     }                                                                                                
     206                 :                                                                                                      
     207                 :     /**                                                                                              
     208                 :      * Converts the difference into a string                                                         
     209                 :      *                                                                                               
     210                 :      * @param  array  $row         the texts lines in a row                                          
     211                 :      * @param  string $meonRemoved the words removed from Meon                                       
     212                 :      * @param  string $froAdded    the words added in the old French text of reference               
     213                 :      * @param  bool   $inMartin    the added words are in Martin if true, false otherwise            
     214                 :      * @return string the difference as a string                                                     
     215                 :      */                                                                                              
     216                 :     public function diffToString($row, $meonRemoved, $froAdded, $inMartin)                           
     217                 :     {                                                                                                
     218               2 :         if ($froAdded) {                                                                             
     219               2 :             $martinString = $inMartin? sprintf(self::MARTIN_FMT,                                     
     220               2 :                 $row[Episode::COL_MARTIN_CHAPTERS], $row[Episode::COL_MARTIN_NUMBERS]) : '';         
     221                 :                                                                                                      
     222               2 :             $difference = sprintf(self::DIFFERENCE_FMT,                                              
     223               2 :                 $row[Episode::COL_FRO_NUMBERS],                                                      
     224               2 :                 $row[Episode::COL_MEON_NUMBERS], $meonRemoved,                                       
     225               2 :                 $martinString,                                                                       
     226               2 :                 $row[Episode::COL_FRO_NUMBERS], $froAdded);                                          
     227                 :                                                                                                      
     228               2 :         } else {                                                                                     
     229               2 :             $difference = '';                                                                        
     230                 :         }                                                                                            
     231                 :                                                                                                      
     232               2 :         return $difference;                                                                          
     233                 :     }                                                                                                
     234                 :                                                                                                      
     235                 :     /**                                                                                              
     236                 :      * Finds the differences between the text of Meon and the old French text of reference           
     237                 :      *                                                                                               
     238                 :      * @return string the list of differences or a message if there are no differences               
     239                 :      */                                                                                              
     240                 :     public function findDifferences()                                                                
     241                 :     {                                                                                                
     242                 :         // reads the rows                                                                            
     243               1 :         $this->readSheet();                                                                          
     244               1 :         $rows = $this->readRows();                                                                   
     245                 :                                                                                                      
     246               1 :         $differences = array();                                                                      
     247                 :                                                                                                      
     248                 :         // finds the differences between the texts lines in each row                                 
     249               1 :         foreach($rows as $row) {                                                                     
     250               1 :             list($meonRemoved, $froAdded, $inMartin) = $this->diffMeonFroMartinLines($row);          
     251               1 :             $differences[] = $this->diffToString($row, $meonRemoved, $froAdded, $inMartin);          
     252               1 :         }                                                                                            
     253                 :                                                                                                      
     254                 :         // writes the differences in another column                                                  
     255               1 :         $this->writeColumn($differences, Episode::COL_DIFFERENCES);                                  
     256               1 :         $this->writeSheet();                                                                         
     257                 :                                                                                                      
     258                 :         // reports the differences                                                                   
     259               1 :         $result = $this->arrayToString($differences) or $result = self::MSG_NO_DIFFERENCE;           
     260                 :                                                                                                      
     261               1 :         return $result;                                                                              
     262                 :     }                                                                                                
     263                 :                                                                                                      
     264                 :     /**                                                                                              
     265                 :      * Implodes a text line but hiding unchanged words                                               
     266                 :      *                                                                                               
     267                 :      * @param  array  $originalWords the original words                                              
     268                 :      * @param  array  $changedWsords the words added or removed                                      
     269                 :      * @param  string $glue          the string replacing unchanged words                            
     270                 :      * @return string the imploded text line                                                         
     271                 :      */                                                                                              
     272                 :     public function implodeLine($originalWords, $changedWsords, $glue = '...')                       
     273                 :     {                                                                                                
     274               3 :         $verse = '';                                                                                 
     275                 :                                                                                                      
     276               3 :         foreach(array_intersect($originalWords, $changedWsords) as $idx => $word) {                  
     277               3 :             if ($verse) {                                                                            
     278               2 :                 $verse .= ' ';                                                                       
     279               2 :                 $idx != ($prevIdx + 1) and $verse .= $glue . ' ';                                    
     280               2 :             }                                                                                        
     281                 :                                                                                                      
     282               3 :             $verse .= $word;                                                                         
     283               3 :             $prevIdx = $idx;                                                                         
     284               3 :         }                                                                                            
     285                 :                                                                                                      
     286               3 :         return $verse;                                                                               
     287                 :     }                                                                                                
     288                 :                                                                                                      
     289                 :     /**                                                                                              
     290                 :      * Finds if a list of words is in a Martin text line                                             
     291                 :      *                                                                                               
     292                 :      * @param  array   $words      the words to find                                                 
     293                 :      * @param  string  $martinLine the Martin text line                                              
     294                 :      * @return boolean true if all the words are found, false othewise                               
     295                 :      */                                                                                              
     296                 :     public function isFixInMartin($words, $martinLine)                                               
     297                 :     {                                                                                                
     298               3 :         $martinWords = $this->splitLine($martinLine);                                                
     299                 :                                                                                                      
     300               3 :         return $words and $martinLine and array_intersect($words, $martinWords) == $words;           
     301                 :     }                                                                                                
     302                 :                                                                                                      
     303                 :     /**                                                                                              
     304                 :      * Splits a line of text into words                                                              
     305                 :      *                                                                                               
     306                 :      * @param  string $line the line of text to split                                                
     307                 :      * @return mixed  the words of the line of text                                                  
     308                 :      */                                                                                              
     309                 :     public function splitLine($line)                                                                 
     310                 :     {                                                                                                
     311               4 :         return preg_split('~[,;.:!? ]~', $line, -1, PREG_SPLIT_NO_EMPTY);                            
     312                 :     }                                                                                                
     313                 :                                                                                                      
     314                 :     /**                                                                                              
     315                 :      * Parses a difference                                                                           
     316                 :      *                                                                                               
     317                 :      * @param  string $difference the difference to parse                                            
     318                 :      * @return mixed  the difference details                                                         
     319                 :      */                                                                                              
     320                 :     public function toCsv($difference)                                                               
     321                 :     {                                                                                                
     322               3 :         $base = new Base;                                                                            
     323                 :                                                                                                      
     324               3 :         $pattern = $base->completePattern(self::DIFFERENCE_FMT, '(.*)');                             
     325               3 :         $difference = $base->match($pattern, $difference, self::ERR_PARSE_DIFFERENCE,                
     326               3 :             Differences::$differenceKeys, true);                                                     
     327                 :                                                                                                      
     328               3 :         return $difference;                                                                          
     329                 :     }                                                                                                

Generated by PHPUnit 3.4.11 and Xdebug 2.0.4 using PHP 5.2.11 at Tue Apr 6 13:06:37 UTC 2010.