Merge branch 'QA_3_3'
[phpmyadmin/dkf.git] / libraries / import / csv.php
blobc6aebdf72911526f1fc08487c650ce2665d22cfa
1 <?php
2 /* vim: set expandtab sw=4 ts=4 sts=4: */
3 /**
4 * CSV import plugin for phpMyAdmin
6 * @todo add an option for handling NULL values
7 * @version $Id$
8 * @package phpMyAdmin-Import
9 */
10 if (! defined('PHPMYADMIN')) {
11 exit;
14 $analyze = false;
16 if ($plugin_param !== 'table') {
17 $analyze = true;
20 if (isset($plugin_list)) {
21 $plugin_list['csv'] = array(
22 'text' => 'strCSV',
23 'extension' => 'csv',
24 'options' => array(
25 array('type' => 'bool', 'name' => 'replace', 'text' => 'strReplaceTable'),
26 array('type' => 'bool', 'name' => 'ignore', 'text' => 'strIgnoreDuplicates'),
27 array('type' => 'text', 'name' => 'terminated', 'text' => 'strFieldsTerminatedBy', 'size' => 2, 'len' => 2),
28 array('type' => 'text', 'name' => 'enclosed', 'text' => 'strFieldsEnclosedBy', 'size' => 2, 'len' => 2),
29 array('type' => 'text', 'name' => 'escaped', 'text' => 'strFieldsEscapedBy', 'size' => 2, 'len' => 2),
30 array('type' => 'text', 'name' => 'new_line', 'text' => 'strLinesTerminatedBy', 'size' => 2),
32 'options_text' => 'strOptions',
35 if ($plugin_param !== 'table') {
36 $plugin_list['csv']['options'][] =
37 array('type' => 'bool', 'name' => 'col_names', 'text' => 'strImportColNames');
38 } else {
39 $plugin_list['csv']['options'][] =
40 array('type' => 'text', 'name' => 'columns', 'text' => 'strColumnNames');
43 /* We do not define function when plugin is just queried for information above */
44 return;
47 $replacements = array(
48 '\\n' => "\n",
49 '\\t' => "\t",
50 '\\r' => "\r",
52 $csv_terminated = strtr($csv_terminated, $replacements);
53 $csv_enclosed = strtr($csv_enclosed, $replacements);
54 $csv_escaped = strtr($csv_escaped, $replacements);
55 $csv_new_line = strtr($csv_new_line, $replacements);
57 if (strlen($csv_terminated) != 1) {
58 $message = PMA_Message::error('strInvalidCSVParameter');
59 $message->addParam('strFieldsTerminatedBy', false);
60 $error = TRUE;
61 // The default dialog of MS Excel when generating a CSV produces a
62 // semi-colon-separated file with no chance of specifying the
63 // enclosing character. Thus, users who want to import this file
64 // tend to remove the enclosing character on the Import dialog.
65 // I could not find a test case where having no enclosing characters
66 // confuses this script.
67 // But the parser won't work correctly with strings so we allow just
68 // one character.
69 } elseif (strlen($csv_enclosed) > 1) {
70 $message = PMA_Message::error('strInvalidCSVParameter');
71 $message->addParam('strFieldsEnclosedBy', false);
72 $error = TRUE;
73 } elseif (strlen($csv_escaped) != 1) {
74 $message = PMA_Message::error('strInvalidCSVParameter');
75 $message->addParam('strFieldsEscapedBy', false);
76 $error = TRUE;
77 } elseif (strlen($csv_new_line) != 1 && $csv_new_line != 'auto') {
78 $message = PMA_Message::error('strInvalidCSVParameter');
79 $message->addParam('strLinesTerminatedBy', false);
80 $error = TRUE;
83 $buffer = '';
84 $required_fields = 0;
86 if (!$analyze) {
87 if (isset($csv_replace)) {
88 $sql_template = 'REPLACE';
89 } else {
90 $sql_template = 'INSERT';
91 if (isset($csv_ignore)) {
92 $sql_template .= ' IGNORE';
95 $sql_template .= ' INTO ' . PMA_backquote($table);
97 $tmp_fields = PMA_DBI_get_fields($db, $table);
99 if (empty($csv_columns)) {
100 $fields = $tmp_fields;
101 } else {
102 $sql_template .= ' (';
103 $fields = array();
104 $tmp = preg_split('/,( ?)/', $csv_columns);
105 foreach ($tmp as $key => $val) {
106 if (count($fields) > 0) {
107 $sql_template .= ', ';
109 /* Trim also `, if user already included backquoted fields */
110 $val = trim($val, " \t\r\n\0\x0B`");
111 $found = FALSE;
112 foreach ($tmp_fields as $id => $field) {
113 if ($field['Field'] == $val) {
114 $found = TRUE;
115 break;
118 if (!$found) {
119 $message = PMA_Message::error('strInvalidColumn');
120 $message->addParam($val);
121 $error = TRUE;
122 break;
124 $fields[] = $field;
125 $sql_template .= PMA_backquote($val);
127 $sql_template .= ') ';
130 $required_fields = count($fields);
132 $sql_template .= ' VALUES (';
135 // Defaults for parser
136 $i = 0;
137 $len = 0;
138 $line = 1;
139 $lasti = -1;
140 $values = array();
141 $csv_finish = FALSE;
143 $tempRow = array();
144 $rows = array();
145 $col_names = array();
146 $tables = array();
148 $col_count = 0;
149 $max_cols = 0;
151 while (!($finished && $i >= $len) && !$error && !$timeout_passed) {
152 $data = PMA_importGetNextChunk();
153 if ($data === FALSE) {
154 // subtract data we didn't handle yet and stop processing
155 $offset -= strlen($buffer);
156 break;
157 } elseif ($data === TRUE) {
158 // Handle rest of buffer
159 } else {
160 // Append new data to buffer
161 $buffer .= $data;
162 unset($data);
163 // Do not parse string when we're not at the end and don't have new line inside
164 if (($csv_new_line == 'auto' && strpos($buffer, "\r") === FALSE && strpos($buffer, "\n") === FALSE)
165 || ($csv_new_line != 'auto' && strpos($buffer, $csv_new_line) === FALSE)) {
166 continue;
170 // Current length of our buffer
171 $len = strlen($buffer);
172 // Currently parsed char
173 $ch = $buffer[$i];
174 while ($i < $len) {
175 // Deadlock protection
176 if ($lasti == $i && $lastlen == $len) {
177 $message = PMA_Message::error('strInvalidCSVFormat');
178 $message->addParam($line);
179 $error = TRUE;
180 break;
182 $lasti = $i;
183 $lastlen = $len;
185 // This can happen with auto EOL and \r at the end of buffer
186 if (!$csv_finish) {
187 // Grab empty field
188 if ($ch == $csv_terminated) {
189 if ($i == $len - 1) {
190 break;
192 $values[] = '';
193 $i++;
194 $ch = $buffer[$i];
195 continue;
198 // Grab one field
199 $fallbacki = $i;
200 if ($ch == $csv_enclosed) {
201 if ($i == $len - 1) {
202 break;
204 $need_end = TRUE;
205 $i++;
206 $ch = $buffer[$i];
207 } else {
208 $need_end = FALSE;
210 $fail = FALSE;
211 $value = '';
212 while (($need_end && $ch != $csv_enclosed)
213 || (!$need_end && !($ch == $csv_terminated
214 || $ch == $csv_new_line || ($csv_new_line == 'auto'
215 && ($ch == "\r" || $ch == "\n"))))) {
216 if ($ch == $csv_escaped) {
217 if ($i == $len - 1) {
218 $fail = TRUE;
219 break;
221 $i++;
222 $ch = $buffer[$i];
224 $value .= $ch;
225 if ($i == $len - 1) {
226 if (!$finished) {
227 $fail = TRUE;
229 break;
231 $i++;
232 $ch = $buffer[$i];
235 // unquoted NULL string
236 if (false === $need_end && $value === 'NULL') {
237 $value = null;
240 if ($fail) {
241 $i = $fallbacki;
242 $ch = $buffer[$i];
243 break;
245 // Need to strip trailing enclosing char?
246 if ($need_end && $ch == $csv_enclosed) {
247 if ($finished && $i == $len - 1) {
248 $ch = NULL;
249 } elseif ($i == $len - 1) {
250 $i = $fallbacki;
251 $ch = $buffer[$i];
252 break;
253 } else {
254 $i++;
255 $ch = $buffer[$i];
258 // Are we at the end?
259 if ($ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n")) || ($finished && $i == $len - 1)) {
260 $csv_finish = TRUE;
262 // Go to next char
263 if ($ch == $csv_terminated) {
264 if ($i == $len - 1) {
265 $i = $fallbacki;
266 $ch = $buffer[$i];
267 break;
269 $i++;
270 $ch = $buffer[$i];
272 // If everything went okay, store value
273 $values[] = $value;
276 // End of line
277 if ($csv_finish || $ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))) {
278 if ($csv_new_line == 'auto' && $ch == "\r") { // Handle "\r\n"
279 if ($i >= ($len - 2) && !$finished) {
280 break; // We need more data to decide new line
282 if ($buffer[$i + 1] == "\n") {
283 $i++;
286 // We didn't parse value till the end of line, so there was empty one
287 if (!$csv_finish) {
288 $values[] = '';
291 if ($analyze) {
292 foreach ($values as $ley => $val) {
293 $tempRow[] = $val;
294 ++$col_count;
297 if ($col_count > $max_cols) {
298 $max_cols = $col_count;
300 $col_count = 0;
302 $rows[] = $tempRow;
303 $tempRow = array();
304 } else {
305 // Do we have correct count of values?
306 if (count($values) != $required_fields) {
308 // Hack for excel
309 if ($values[count($values) - 1] == ';') {
310 unset($values[count($values) - 1]);
311 } else {
312 $message = PMA_Message::error('strInvalidCSVFieldCount');
313 $message->addParam($line);
314 $error = TRUE;
315 break;
319 $first = TRUE;
320 $sql = $sql_template;
321 foreach ($values as $key => $val) {
322 if (!$first) {
323 $sql .= ', ';
325 if ($val === null) {
326 $sql .= 'NULL';
327 } else {
328 $sql .= '\'' . addslashes($val) . '\'';
331 $first = FALSE;
333 $sql .= ')';
336 * @todo maybe we could add original line to verbose SQL in comment
338 PMA_importRunQuery($sql, $sql);
341 $line++;
342 $csv_finish = FALSE;
343 $values = array();
344 $buffer = substr($buffer, $i + 1);
345 $len = strlen($buffer);
346 $i = 0;
347 $lasti = -1;
348 $ch = $buffer[0];
350 } // End of parser loop
351 } // End of import loop
353 if ($analyze) {
354 /* Fill out all rows */
355 $num_rows = count($rows);
356 for ($i = 0; $i < $num_rows; ++$i) {
357 for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
358 $rows[$i][] = 'NULL';
362 if ($_REQUEST['csv_col_names']) {
363 $col_names = array_splice($rows, 0, 1);
364 $col_names = $col_names[0];
367 if ((isset($col_names) && count($col_names) != $max_cols) || !isset($col_names)) {
368 // Fill out column names
369 for ($i = 0; $i < $max_cols; ++$i) {
370 $col_names[] = 'COL '.($i+1);
374 if (strlen($db)) {
375 $result = PMA_DBI_fetch_result('SHOW TABLES');
376 $tbl_name = 'TABLE '.(count($result) + 1);
377 } else {
378 $tbl_name = 'TBL_NAME';
381 $tables[] = array($tbl_name, $col_names, $rows);
383 /* Obtain the best-fit MySQL types for each column */
384 $analyses = array();
385 $analyses[] = PMA_analyzeTable($tables[0]);
388 * string $db_name (no backquotes)
390 * array $table = array(table_name, array() column_names, array()() rows)
391 * array $tables = array of "$table"s
393 * array $analysis = array(array() column_types, array() column_sizes)
394 * array $analyses = array of "$analysis"s
396 * array $create = array of SQL strings
398 * array $options = an associative array of options
401 /* Set database name to the currently selected one, if applicable */
402 if (strlen($db)) {
403 $db_name = $db;
404 $options = array('create_db' => false);
405 } else {
406 $db_name = 'CSV_DB';
407 $options = NULL;
410 /* Non-applicable parameters */
411 $create = NULL;
413 /* Created and execute necessary SQL statements from data */
414 PMA_buildSQL($db_name, $tables, $analyses, $create, $options);
416 unset($tables);
417 unset($analyses);
420 // Commit any possible data in buffers
421 PMA_importRunQuery();
423 if (count($values) != 0 && !$error) {
424 $message = PMA_Message::error('strInvalidCSVFormat');
425 $message->addParam($line);
426 $error = TRUE;