3.4.0-rc1
[phpmyadmin/thilanka.git] / libraries / import / csv.php
blobe41ea35bffc6104ae20c5de9f965b0a57b1b9aac
1 <?php
2 /* vim: set expandtab sw=4 ts=4 sts=4: */
3 /**
4 * CSV import plugin for phpMyAdmin
6 * @todo add an option for handling NULL values
7 * @package phpMyAdmin-Import
8 */
9 if (! defined('PHPMYADMIN')) {
10 exit;
13 $analyze = false;
15 if ($plugin_param !== 'table') {
16 $analyze = true;
19 if (isset($plugin_list)) {
20 $plugin_list['csv'] = array(
21 'text' => __('CSV'),
22 'extension' => 'csv',
23 'options' => array(
24 array('type' => 'begin_group', 'name' => 'general_opts'),
25 array('type' => 'bool', 'name' => 'replace', 'text' => __('Replace table data with file')),
26 array('type' => 'bool', 'name' => 'ignore', 'text' => __('Do not abort on INSERT error')),
27 array('type' => 'text', 'name' => 'terminated', 'text' => __('Columns separated with:'), 'size' => 2, 'len' => 2),
28 array('type' => 'text', 'name' => 'enclosed', 'text' => __('Columns enclosed with:'), 'size' => 2, 'len' => 2),
29 array('type' => 'text', 'name' => 'escaped', 'text' => __('Columns escaped with:'), 'size' => 2, 'len' => 2),
30 array('type' => 'text', 'name' => 'new_line', 'text' => __('Lines terminated with:'), 'size' => 2),
32 'options_text' => __('Options'),
35 if ($plugin_param !== 'table') {
36 $plugin_list['csv']['options'][] =
37 array('type' => 'bool', 'name' => 'col_names', 'text' => __('The first line of the file contains the table column names <i>(if this is unchecked, the first line will become part of the data)</i>'));
38 } else {
39 $hint = new PMA_Message(__('If the data in each row of the file is not in the same order as in the database, list the corresponding column names here. Column names must be separated by commas and not enclosed in quotations.'));
40 $plugin_list['csv']['options'][] =
41 array('type' => 'text', 'name' => 'columns', 'text' => __('Column names: ' . PMA_showHint($hint)));
43 $plugin_list['csv']['options'][] = array('type' => 'end_group');
45 /* We do not define function when plugin is just queried for information above */
46 return;
49 $replacements = array(
50 '\\n' => "\n",
51 '\\t' => "\t",
52 '\\r' => "\r",
54 $csv_terminated = strtr($csv_terminated, $replacements);
55 $csv_enclosed = strtr($csv_enclosed, $replacements);
56 $csv_escaped = strtr($csv_escaped, $replacements);
57 $csv_new_line = strtr($csv_new_line, $replacements);
59 $param_error = FALSE;
60 if (strlen($csv_terminated) != 1) {
61 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
62 $message->addParam(__('Columns terminated by'), false);
63 $error = TRUE;
64 $param_error = TRUE;
65 // The default dialog of MS Excel when generating a CSV produces a
66 // semi-colon-separated file with no chance of specifying the
67 // enclosing character. Thus, users who want to import this file
68 // tend to remove the enclosing character on the Import dialog.
69 // I could not find a test case where having no enclosing characters
70 // confuses this script.
71 // But the parser won't work correctly with strings so we allow just
72 // one character.
73 } elseif (strlen($csv_enclosed) > 1) {
74 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
75 $message->addParam(__('Columns enclosed by'), false);
76 $error = TRUE;
77 $param_error = TRUE;
78 } elseif (strlen($csv_escaped) != 1) {
79 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
80 $message->addParam(__('Columns escaped by'), false);
81 $error = TRUE;
82 $param_error = TRUE;
83 } elseif (strlen($csv_new_line) != 1 && $csv_new_line != 'auto') {
84 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
85 $message->addParam(__('Lines terminated by'), false);
86 $error = TRUE;
87 $param_error = TRUE;
90 // If there is an error in the parameters entered, indicate that immediately.
91 if ($param_error) {
92 PMA_mysqlDie($message->getMessage(), '', '', $err_url);
95 $buffer = '';
96 $required_fields = 0;
98 if (!$analyze) {
99 if (isset($csv_replace)) {
100 $sql_template = 'REPLACE';
101 } else {
102 $sql_template = 'INSERT';
103 if (isset($csv_ignore)) {
104 $sql_template .= ' IGNORE';
107 $sql_template .= ' INTO ' . PMA_backquote($table);
109 $tmp_fields = PMA_DBI_get_fields($db, $table);
111 if (empty($csv_columns)) {
112 $fields = $tmp_fields;
113 } else {
114 $sql_template .= ' (';
115 $fields = array();
116 $tmp = preg_split('/,( ?)/', $csv_columns);
117 foreach ($tmp as $key => $val) {
118 if (count($fields) > 0) {
119 $sql_template .= ', ';
121 /* Trim also `, if user already included backquoted fields */
122 $val = trim($val, " \t\r\n\0\x0B`");
123 $found = FALSE;
124 foreach ($tmp_fields as $id => $field) {
125 if ($field['Field'] == $val) {
126 $found = TRUE;
127 break;
130 if (!$found) {
131 $message = PMA_Message::error(__('Invalid column (%s) specified! Ensure that columns names are spelled correctly, separated by commas, and not enclosed in quotes.' ));
132 $message->addParam($val);
133 $error = TRUE;
134 break;
136 $fields[] = $field;
137 $sql_template .= PMA_backquote($val);
139 $sql_template .= ') ';
142 $required_fields = count($fields);
144 $sql_template .= ' VALUES (';
147 // Defaults for parser
148 $i = 0;
149 $len = 0;
150 $line = 1;
151 $lasti = -1;
152 $values = array();
153 $csv_finish = FALSE;
155 $tempRow = array();
156 $rows = array();
157 $col_names = array();
158 $tables = array();
160 $col_count = 0;
161 $max_cols = 0;
163 while (!($finished && $i >= $len) && !$error && !$timeout_passed) {
164 $data = PMA_importGetNextChunk();
165 if ($data === FALSE) {
166 // subtract data we didn't handle yet and stop processing
167 $offset -= strlen($buffer);
168 break;
169 } elseif ($data === TRUE) {
170 // Handle rest of buffer
171 } else {
172 // Append new data to buffer
173 $buffer .= $data;
174 unset($data);
175 // Do not parse string when we're not at the end and don't have new line inside
176 if (($csv_new_line == 'auto' && strpos($buffer, "\r") === FALSE && strpos($buffer, "\n") === FALSE)
177 || ($csv_new_line != 'auto' && strpos($buffer, $csv_new_line) === FALSE)) {
178 continue;
182 // Current length of our buffer
183 $len = strlen($buffer);
184 // Currently parsed char
185 $ch = $buffer[$i];
186 while ($i < $len) {
187 // Deadlock protection
188 if ($lasti == $i && $lastlen == $len) {
189 $message = PMA_Message::error(__('Invalid format of CSV input on line %d.'));
190 $message->addParam($line);
191 $error = TRUE;
192 break;
194 $lasti = $i;
195 $lastlen = $len;
197 // This can happen with auto EOL and \r at the end of buffer
198 if (!$csv_finish) {
199 // Grab empty field
200 if ($ch == $csv_terminated) {
201 if ($i == $len - 1) {
202 break;
204 $values[] = '';
205 $i++;
206 $ch = $buffer[$i];
207 continue;
210 // Grab one field
211 $fallbacki = $i;
212 if ($ch == $csv_enclosed) {
213 if ($i == $len - 1) {
214 break;
216 $need_end = TRUE;
217 $i++;
218 $ch = $buffer[$i];
219 } else {
220 $need_end = FALSE;
222 $fail = FALSE;
223 $value = '';
224 while (($need_end && $ch != $csv_enclosed)
225 || (!$need_end && !($ch == $csv_terminated
226 || $ch == $csv_new_line || ($csv_new_line == 'auto'
227 && ($ch == "\r" || $ch == "\n"))))) {
228 if ($ch == $csv_escaped) {
229 if ($i == $len - 1) {
230 $fail = TRUE;
231 break;
233 $i++;
234 $ch = $buffer[$i];
236 $value .= $ch;
237 if ($i == $len - 1) {
238 if (!$finished) {
239 $fail = TRUE;
241 break;
243 $i++;
244 $ch = $buffer[$i];
247 // unquoted NULL string
248 if (false === $need_end && $value === 'NULL') {
249 $value = null;
252 if ($fail) {
253 $i = $fallbacki;
254 $ch = $buffer[$i];
255 break;
257 // Need to strip trailing enclosing char?
258 if ($need_end && $ch == $csv_enclosed) {
259 if ($finished && $i == $len - 1) {
260 $ch = NULL;
261 } elseif ($i == $len - 1) {
262 $i = $fallbacki;
263 $ch = $buffer[$i];
264 break;
265 } else {
266 $i++;
267 $ch = $buffer[$i];
270 // Are we at the end?
271 if ($ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n")) || ($finished && $i == $len - 1)) {
272 $csv_finish = TRUE;
274 // Go to next char
275 if ($ch == $csv_terminated) {
276 if ($i == $len - 1) {
277 $i = $fallbacki;
278 $ch = $buffer[$i];
279 break;
281 $i++;
282 $ch = $buffer[$i];
284 // If everything went okay, store value
285 $values[] = $value;
288 // End of line
289 if ($csv_finish || $ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))) {
290 if ($csv_new_line == 'auto' && $ch == "\r") { // Handle "\r\n"
291 if ($i >= ($len - 2) && !$finished) {
292 break; // We need more data to decide new line
294 if ($buffer[$i + 1] == "\n") {
295 $i++;
298 // We didn't parse value till the end of line, so there was empty one
299 if (!$csv_finish) {
300 $values[] = '';
303 if ($analyze) {
304 foreach ($values as $ley => $val) {
305 $tempRow[] = $val;
306 ++$col_count;
309 if ($col_count > $max_cols) {
310 $max_cols = $col_count;
312 $col_count = 0;
314 $rows[] = $tempRow;
315 $tempRow = array();
316 } else {
317 // Do we have correct count of values?
318 if (count($values) != $required_fields) {
320 // Hack for excel
321 if ($values[count($values) - 1] == ';') {
322 unset($values[count($values) - 1]);
323 } else {
324 $message = PMA_Message::error(__('Invalid column count in CSV input on line %d.'));
325 $message->addParam($line);
326 $error = TRUE;
327 break;
331 $first = TRUE;
332 $sql = $sql_template;
333 foreach ($values as $key => $val) {
334 if (!$first) {
335 $sql .= ', ';
337 if ($val === null) {
338 $sql .= 'NULL';
339 } else {
340 $sql .= '\'' . addslashes($val) . '\'';
343 $first = FALSE;
345 $sql .= ')';
348 * @todo maybe we could add original line to verbose SQL in comment
350 PMA_importRunQuery($sql, $sql);
353 $line++;
354 $csv_finish = FALSE;
355 $values = array();
356 $buffer = substr($buffer, $i + 1);
357 $len = strlen($buffer);
358 $i = 0;
359 $lasti = -1;
360 $ch = $buffer[0];
362 } // End of parser loop
363 } // End of import loop
365 if ($analyze) {
366 /* Fill out all rows */
367 $num_rows = count($rows);
368 for ($i = 0; $i < $num_rows; ++$i) {
369 for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
370 $rows[$i][] = 'NULL';
374 if (isset($_REQUEST['csv_col_names'])) {
375 $col_names = array_splice($rows, 0, 1);
376 $col_names = $col_names[0];
379 if ((isset($col_names) && count($col_names) != $max_cols) || !isset($col_names)) {
380 // Fill out column names
381 for ($i = 0; $i < $max_cols; ++$i) {
382 $col_names[] = 'COL '.($i+1);
386 if (strlen($db)) {
387 $result = PMA_DBI_fetch_result('SHOW TABLES');
388 $tbl_name = 'TABLE '.(count($result) + 1);
389 } else {
390 $tbl_name = 'TBL_NAME';
393 $tables[] = array($tbl_name, $col_names, $rows);
395 /* Obtain the best-fit MySQL types for each column */
396 $analyses = array();
397 $analyses[] = PMA_analyzeTable($tables[0]);
400 * string $db_name (no backquotes)
402 * array $table = array(table_name, array() column_names, array()() rows)
403 * array $tables = array of "$table"s
405 * array $analysis = array(array() column_types, array() column_sizes)
406 * array $analyses = array of "$analysis"s
408 * array $create = array of SQL strings
410 * array $options = an associative array of options
413 /* Set database name to the currently selected one, if applicable */
414 if (strlen($db)) {
415 $db_name = $db;
416 $options = array('create_db' => false);
417 } else {
418 $db_name = 'CSV_DB';
419 $options = NULL;
422 /* Non-applicable parameters */
423 $create = NULL;
425 /* Created and execute necessary SQL statements from data */
426 PMA_buildSQL($db_name, $tables, $analyses, $create, $options);
428 unset($tables);
429 unset($analyses);
432 // Commit any possible data in buffers
433 PMA_importRunQuery();
435 if (count($values) != 0 && !$error) {
436 $message = PMA_Message::error(__('Invalid format of CSV input on line %d.'));
437 $message->addParam($line);
438 $error = TRUE;