Advisor: mark that 'Rate of reading fixed position' may be wrong, requires further...
[phpmyadmin/thilanka.git] / libraries / import / csv.php
bloba0b3348a8cfe87c336cc3ac07047901b55a6a235
1 <?php
2 /* vim: set expandtab sw=4 ts=4 sts=4: */
3 /**
4 * CSV import plugin for phpMyAdmin
6 * @todo add an option for handling NULL values
7 * @package phpMyAdmin-Import
8 * @subpackage CSV
9 */
10 if (! defined('PHPMYADMIN')) {
11 exit;
14 $analyze = false;
16 if ($plugin_param !== 'table') {
17 $analyze = true;
20 if (isset($plugin_list)) {
21 $plugin_list['csv'] = array(
22 'text' => __('CSV'),
23 'extension' => 'csv',
24 'options' => array(
25 array('type' => 'begin_group', 'name' => 'general_opts'),
26 array('type' => 'bool', 'name' => 'replace', 'text' => __('Replace table data with file')),
27 array('type' => 'bool', 'name' => 'ignore', 'text' => __('Do not abort on INSERT error')),
28 array('type' => 'text', 'name' => 'terminated', 'text' => __('Columns separated with:'), 'size' => 2, 'len' => 2),
29 array('type' => 'text', 'name' => 'enclosed', 'text' => __('Columns enclosed with:'), 'size' => 2, 'len' => 2),
30 array('type' => 'text', 'name' => 'escaped', 'text' => __('Columns escaped with:'), 'size' => 2, 'len' => 2),
31 array('type' => 'text', 'name' => 'new_line', 'text' => __('Lines terminated with:'), 'size' => 2),
33 'options_text' => __('Options'),
36 if ($plugin_param !== 'table') {
37 $plugin_list['csv']['options'][] =
38 array('type' => 'bool', 'name' => 'col_names', 'text' => __('The first line of the file contains the table column names <i>(if this is unchecked, the first line will become part of the data)</i>'));
39 } else {
40 $hint = new PMA_Message(__('If the data in each row of the file is not in the same order as in the database, list the corresponding column names here. Column names must be separated by commas and not enclosed in quotations.'));
41 $plugin_list['csv']['options'][] =
42 array('type' => 'text', 'name' => 'columns', 'text' => __('Column names: ') . PMA_showHint($hint));
44 $plugin_list['csv']['options'][] = array('type' => 'end_group');
46 /* We do not define function when plugin is just queried for information above */
47 return;
50 $replacements = array(
51 '\\n' => "\n",
52 '\\t' => "\t",
53 '\\r' => "\r",
55 $csv_terminated = strtr($csv_terminated, $replacements);
56 $csv_enclosed = strtr($csv_enclosed, $replacements);
57 $csv_escaped = strtr($csv_escaped, $replacements);
58 $csv_new_line = strtr($csv_new_line, $replacements);
60 $param_error = false;
61 if (strlen($csv_terminated) != 1) {
62 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
63 $message->addParam(__('Columns terminated by'), false);
64 $error = true;
65 $param_error = true;
66 // The default dialog of MS Excel when generating a CSV produces a
67 // semi-colon-separated file with no chance of specifying the
68 // enclosing character. Thus, users who want to import this file
69 // tend to remove the enclosing character on the Import dialog.
70 // I could not find a test case where having no enclosing characters
71 // confuses this script.
72 // But the parser won't work correctly with strings so we allow just
73 // one character.
74 } elseif (strlen($csv_enclosed) > 1) {
75 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
76 $message->addParam(__('Columns enclosed by'), false);
77 $error = true;
78 $param_error = true;
79 } elseif (strlen($csv_escaped) != 1) {
80 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
81 $message->addParam(__('Columns escaped by'), false);
82 $error = true;
83 $param_error = true;
84 } elseif (strlen($csv_new_line) != 1 && $csv_new_line != 'auto') {
85 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
86 $message->addParam(__('Lines terminated by'), false);
87 $error = true;
88 $param_error = true;
91 // If there is an error in the parameters entered, indicate that immediately.
92 if ($param_error) {
93 PMA_mysqlDie($message->getMessage(), '', '', $err_url);
96 $buffer = '';
97 $required_fields = 0;
99 if (!$analyze) {
100 if (isset($csv_replace)) {
101 $sql_template = 'REPLACE';
102 } else {
103 $sql_template = 'INSERT';
104 if (isset($csv_ignore)) {
105 $sql_template .= ' IGNORE';
108 $sql_template .= ' INTO ' . PMA_backquote($table);
110 $tmp_fields = PMA_DBI_get_columns($db, $table);
112 if (empty($csv_columns)) {
113 $fields = $tmp_fields;
114 } else {
115 $sql_template .= ' (';
116 $fields = array();
117 $tmp = preg_split('/,( ?)/', $csv_columns);
118 foreach ($tmp as $key => $val) {
119 if (count($fields) > 0) {
120 $sql_template .= ', ';
122 /* Trim also `, if user already included backquoted fields */
123 $val = trim($val, " \t\r\n\0\x0B`");
124 $found = false;
125 foreach ($tmp_fields as $id => $field) {
126 if ($field['Field'] == $val) {
127 $found = true;
128 break;
131 if (!$found) {
132 $message = PMA_Message::error(__('Invalid column (%s) specified! Ensure that columns names are spelled correctly, separated by commas, and not enclosed in quotes.' ));
133 $message->addParam($val);
134 $error = true;
135 break;
137 $fields[] = $field;
138 $sql_template .= PMA_backquote($val);
140 $sql_template .= ') ';
143 $required_fields = count($fields);
145 $sql_template .= ' VALUES (';
148 // Defaults for parser
149 $i = 0;
150 $len = 0;
151 $line = 1;
152 $lasti = -1;
153 $values = array();
154 $csv_finish = false;
156 $tempRow = array();
157 $rows = array();
158 $col_names = array();
159 $tables = array();
161 $col_count = 0;
162 $max_cols = 0;
164 while (!($finished && $i >= $len) && !$error && !$timeout_passed) {
165 $data = PMA_importGetNextChunk();
166 if ($data === false) {
167 // subtract data we didn't handle yet and stop processing
168 $offset -= strlen($buffer);
169 break;
170 } elseif ($data === true) {
171 // Handle rest of buffer
172 } else {
173 // Append new data to buffer
174 $buffer .= $data;
175 unset($data);
176 // Do not parse string when we're not at the end and don't have new line inside
177 if (($csv_new_line == 'auto' && strpos($buffer, "\r") === false && strpos($buffer, "\n") === false)
178 || ($csv_new_line != 'auto' && strpos($buffer, $csv_new_line) === false)) {
179 continue;
183 // Current length of our buffer
184 $len = strlen($buffer);
185 // Currently parsed char
186 $ch = $buffer[$i];
187 while ($i < $len) {
188 // Deadlock protection
189 if ($lasti == $i && $lastlen == $len) {
190 $message = PMA_Message::error(__('Invalid format of CSV input on line %d.'));
191 $message->addParam($line);
192 $error = true;
193 break;
195 $lasti = $i;
196 $lastlen = $len;
198 // This can happen with auto EOL and \r at the end of buffer
199 if (!$csv_finish) {
200 // Grab empty field
201 if ($ch == $csv_terminated) {
202 if ($i == $len - 1) {
203 break;
205 $values[] = '';
206 $i++;
207 $ch = $buffer[$i];
208 continue;
211 // Grab one field
212 $fallbacki = $i;
213 if ($ch == $csv_enclosed) {
214 if ($i == $len - 1) {
215 break;
217 $need_end = true;
218 $i++;
219 $ch = $buffer[$i];
220 } else {
221 $need_end = false;
223 $fail = false;
224 $value = '';
225 while (($need_end && $ch != $csv_enclosed)
226 || (!$need_end && !($ch == $csv_terminated
227 || $ch == $csv_new_line || ($csv_new_line == 'auto'
228 && ($ch == "\r" || $ch == "\n"))))) {
229 if ($ch == $csv_escaped) {
230 if ($i == $len - 1) {
231 $fail = true;
232 break;
234 $i++;
235 $ch = $buffer[$i];
237 $value .= $ch;
238 if ($i == $len - 1) {
239 if (!$finished) {
240 $fail = true;
242 break;
244 $i++;
245 $ch = $buffer[$i];
248 // unquoted NULL string
249 if (false === $need_end && $value === 'NULL') {
250 $value = null;
253 if ($fail) {
254 $i = $fallbacki;
255 $ch = $buffer[$i];
256 break;
258 // Need to strip trailing enclosing char?
259 if ($need_end && $ch == $csv_enclosed) {
260 if ($finished && $i == $len - 1) {
261 $ch = null;
262 } elseif ($i == $len - 1) {
263 $i = $fallbacki;
264 $ch = $buffer[$i];
265 break;
266 } else {
267 $i++;
268 $ch = $buffer[$i];
271 // Are we at the end?
272 if ($ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n")) || ($finished && $i == $len - 1)) {
273 $csv_finish = true;
275 // Go to next char
276 if ($ch == $csv_terminated) {
277 if ($i == $len - 1) {
278 $i = $fallbacki;
279 $ch = $buffer[$i];
280 break;
282 $i++;
283 $ch = $buffer[$i];
285 // If everything went okay, store value
286 $values[] = $value;
289 // End of line
290 if ($csv_finish || $ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))) {
291 if ($csv_new_line == 'auto' && $ch == "\r") { // Handle "\r\n"
292 if ($i >= ($len - 2) && !$finished) {
293 break; // We need more data to decide new line
295 if ($buffer[$i + 1] == "\n") {
296 $i++;
299 // We didn't parse value till the end of line, so there was empty one
300 if (!$csv_finish) {
301 $values[] = '';
304 if ($analyze) {
305 foreach ($values as $ley => $val) {
306 $tempRow[] = $val;
307 ++$col_count;
310 if ($col_count > $max_cols) {
311 $max_cols = $col_count;
313 $col_count = 0;
315 $rows[] = $tempRow;
316 $tempRow = array();
317 } else {
318 // Do we have correct count of values?
319 if (count($values) != $required_fields) {
321 // Hack for excel
322 if ($values[count($values) - 1] == ';') {
323 unset($values[count($values) - 1]);
324 } else {
325 $message = PMA_Message::error(__('Invalid column count in CSV input on line %d.'));
326 $message->addParam($line);
327 $error = true;
328 break;
332 $first = true;
333 $sql = $sql_template;
334 foreach ($values as $key => $val) {
335 if (!$first) {
336 $sql .= ', ';
338 if ($val === null) {
339 $sql .= 'NULL';
340 } else {
341 $sql .= '\'' . PMA_sqlAddSlashes($val) . '\'';
344 $first = false;
346 $sql .= ')';
349 * @todo maybe we could add original line to verbose SQL in comment
351 PMA_importRunQuery($sql, $sql);
354 $line++;
355 $csv_finish = false;
356 $values = array();
357 $buffer = substr($buffer, $i + 1);
358 $len = strlen($buffer);
359 $i = 0;
360 $lasti = -1;
361 $ch = $buffer[0];
363 } // End of parser loop
364 } // End of import loop
366 if ($analyze) {
367 /* Fill out all rows */
368 $num_rows = count($rows);
369 for ($i = 0; $i < $num_rows; ++$i) {
370 for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
371 $rows[$i][] = 'NULL';
375 if (isset($_REQUEST['csv_col_names'])) {
376 $col_names = array_splice($rows, 0, 1);
377 $col_names = $col_names[0];
380 if ((isset($col_names) && count($col_names) != $max_cols) || !isset($col_names)) {
381 // Fill out column names
382 for ($i = 0; $i < $max_cols; ++$i) {
383 $col_names[] = 'COL '.($i+1);
387 if (strlen($db)) {
388 $result = PMA_DBI_fetch_result('SHOW TABLES');
389 $tbl_name = 'TABLE '.(count($result) + 1);
390 } else {
391 $tbl_name = 'TBL_NAME';
394 $tables[] = array($tbl_name, $col_names, $rows);
396 /* Obtain the best-fit MySQL types for each column */
397 $analyses = array();
398 $analyses[] = PMA_analyzeTable($tables[0]);
401 * string $db_name (no backquotes)
403 * array $table = array(table_name, array() column_names, array()() rows)
404 * array $tables = array of "$table"s
406 * array $analysis = array(array() column_types, array() column_sizes)
407 * array $analyses = array of "$analysis"s
409 * array $create = array of SQL strings
411 * array $options = an associative array of options
414 /* Set database name to the currently selected one, if applicable */
415 if (strlen($db)) {
416 $db_name = $db;
417 $options = array('create_db' => false);
418 } else {
419 $db_name = 'CSV_DB';
420 $options = null;
423 /* Non-applicable parameters */
424 $create = null;
426 /* Created and execute necessary SQL statements from data */
427 PMA_buildSQL($db_name, $tables, $analyses, $create, $options);
429 unset($tables);
430 unset($analyses);
433 // Commit any possible data in buffers
434 PMA_importRunQuery();
436 if (count($values) != 0 && !$error) {
437 $message = PMA_Message::error(__('Invalid format of CSV input on line %d.'));
438 $message->addParam($line);
439 $error = true;