1 : <?php
2 :
3 : require_once 'Common/String.php';
4 :
5 : abstract class Model_Parser
6 : {
7 : const BATCH_FILE_TEMPLATE = '../data/word.sql';
8 : const BATCH_FILE_TEMP = '../data/temp.sql';
9 :
10 : const BATCH_FILE_DEFAULT = '../data/%s/word.sql';
11 : const DATABASE_DEFAULT = '../data/%s/dictionary.sqlite';
12 : const DATA_FILE_DEFAULT = '../data/%s/word.txt';
13 : const ERROR_FILE_DEFAULT = '../data/%s/error.txt';
14 :
15 : public $dictionary;
16 :
17 : public $batchFile;
18 : public $dataBase;
19 : public $dataFile;
20 : public $errorFile;
21 : public $sourceFile;
22 : public $verbose;
23 :
24 : public $error;
25 : public $string;
26 :
27 : public function __construct($verbose = false)
28 : {
29 0 : $this->verbose = (bool)$verbose;
30 :
31 0 : $this->batchFile or $this->batchFile = sprintf(self::BATCH_FILE_DEFAULT, $this->dictionary);
32 0 : $this->dataBase or $this->dataBase = sprintf(self::DATABASE_DEFAULT, $this->dictionary);
33 0 : $this->dataFile or $this->dataFile = sprintf(self::DATA_FILE_DEFAULT, $this->dictionary);
34 0 : $this->errorFile or $this->errorFile = sprintf(self::ERROR_FILE_DEFAULT, $this->dictionary);
35 :
36 0 : settype($this->batchFile, 'array');
37 0 : settype($this->dataFile, 'array');
38 :
39 0 : $this->string = new Common_String;
40 0 : }
41 :
42 : public function __destruct()
43 : {
44 0 : if ($this->errorFile and $this->error) {
45 0 : $error = empty($this->error)? '' : implode('', $this->error);
46 :
47 0 : print "writing {$this->errorFile} ... ";
48 0 : @file_put_contents($this->errorFile, $error);
49 0 : print "done";
50 0 : }
51 0 : }
52 :
53 : public function create($lineStart = null, $lineCount = null)
54 : {
55 0 : $this->preProcessing();
56 : // reads and parses the dictionary
57 0 : list($lines, $lineStart) = $this->read($lineStart, $lineCount);
58 0 : $data = $this->parse($lines, $lineStart);
59 0 : $data = $this->postProcessing($data);
60 : // writes and imports the dictionary data files into the database
61 0 : $this->write($data);
62 0 : $this->import();
63 0 : }
64 :
65 : public function createBatchFile()
66 : {
67 0 : $template = file_get_contents(self::BATCH_FILE_TEMPLATE) or
68 0 : $this->error("cannot read " . self::BATCH_FILE_TEMPLATE, true);
69 :
70 0 : $content = sprintf($template, $this->dictionary);
71 :
72 0 : file_put_contents(self::BATCH_FILE_TEMP, $content) or
73 0 : $this->error("cannot write " . self::BATCH_FILE_TEMP, true);
74 0 : }
75 :
76 : public function error($message, $isError, $lineNumber = null, $verbose = false)
77 : {
78 0 : $errorType = $isError? 'Error' : 'Warning';
79 :
80 0 : $string = "\n$errorType! ";
81 0 : is_null($lineNumber) or $string .= "({$this->sourceFile} #$lineNumber) ";
82 0 : $string .= "$message\n";
83 :
84 0 : ($isError or $verbose or $this->verbose) and print $string ;
85 :
86 0 : empty($this->errorFile) or $this->error[] = $string;
87 :
88 0 : $isError and exit(1);
89 0 : }
90 :
91 : public function import()
92 : {
93 : // creates the data base
94 0 : print "creating database {$this->dataBase} ... \n" ;
95 :
96 0 : foreach($this->batchFile as $name) {
97 0 : print "reading $name ... " ;
98 :
99 0 : if (!($isBatchFile = file_exists($name))) {
100 0 : $this->createBatchFile();
101 0 : $name = self::BATCH_FILE_TEMP;
102 0 : }
103 :
104 0 : $command = "echo .read $name | sqlite3 {$this->dataBase}";
105 0 : if (stripos(PHP_OS, 'win') !== false) {
106 0 : $command = preg_replace('~\w:~', '' , $command);
107 0 : $command = str_replace('\\', '/' , $command);
108 0 : }
109 0 : $lineCount = exec($command, $ouput, $returnVar);
110 0 : $returnVar and $this->error("cannot execute $name (error: $returnVar)", true);
111 :
112 0 : is_numeric($lineCount) or
113 0 : $this->error("cannot import via $name (error: $lineCount)", true);
114 :
115 0 : print "$lineCount lines imported\n";
116 :
117 0 : $isBatchFile or unlink(self::BATCH_FILE_TEMP);
118 0 : }
119 0 : }
120 :
121 : public function isEndOfData($line)
122 : {
123 0 : return false;
124 : }
125 :
126 : public function isLineIgnored($line)
127 : {
128 0 : return false;
129 : }
130 :
131 : public function parse($lines, $lineNumber)
132 : {
133 : // parses the dictionary
134 0 : print "parsing {$this->sourceFile} ";
135 :
136 0 : $data = array_fill_keys(array_keys($this->dataFile), '');
137 :
138 0 : foreach($lines as $line) {
139 : // parses the line, adds the lines to the data
140 0 : $parsed = $this->parseLine($line, $lineNumber);
141 : // settype($parsed, 'array'); always an array!
142 :
143 0 : foreach($parsed as $name => $string) {
144 0 : empty($string) or $data[$name] .= $string . "\n";
145 0 : }
146 :
147 0 : $lineNumber++;
148 0 : $lineNumber % 1000 or print '.';
149 0 : }
150 :
151 0 : print ' ' . count($lines) . " lines parsed\n";
152 :
153 0 : return $data;
154 : }
155 :
156 : abstract public function parseLine($line, $lineNumber);
157 :
158 : public function preProcessing()
159 : {
160 0 : }
161 :
162 : public function postProcessing($data)
163 : {
164 0 : return $data;
165 : }
166 :
167 : public function read($lineStart = null, $lineCount = null)
168 : {
169 : // reads the dictionary
170 0 : print "reading {$this->sourceFile} ... ";
171 :
172 0 : $lines = @file($this->sourceFile) or
173 0 : $this->error("cannot read or empty file {$this->sourceFile}", true);
174 0 : print count($lines) . " lines read\n";
175 :
176 0 : empty($lineStart) and $lineStart = 1;
177 0 : empty($lineCount) and $lineCount = 99999;
178 :
179 0 : if ($lineStart !== 1 or $lineCount !== 99999) {
180 : // slices the dictionary (used only for debugging purposes)
181 0 : print "slicing {$this->sourceFile} ... ";
182 0 : $lines = array_slice($lines, $lineStart - 1, $lineCount);
183 0 : print count($lines) . " lines sliced\n";
184 0 : }
185 :
186 0 : return array($lines, $lineStart);
187 : }
188 :
189 : public function validateWordOrder($word, $lineNumber)
190 : {
191 : // validating the word order helps spots invalid entries where entries are expected
192 : // to be sorted in the source file, ex. gdf like Txt files
193 : // it should not be used for dictionaries that are not sorted, ex. ghostwords
194 :
195 0 : static $prevWord = null;
196 :
197 0 : is_null($prevWord) or $prevWord <= $word or
198 0 : $this->error("bad word order: $prevWord > $word", true, $lineNumber);
199 :
200 0 : $prevWord = $word;
201 0 : }
202 :
203 : public function write($data)
204 : {
205 : // writes the dictionary data file
206 0 : foreach($data as $name => $string) {
207 0 : $file = $this->dataFile[$name];
208 :
209 0 : print "writing data file $file ... " ;
210 :
211 0 : $bytesCount = @file_put_contents($file, $string) or
212 0 : $this->error("cannot write file $file", true);
213 :
214 0 : print "done\n";
215 0 : }
216 0 : }
|