6 from collections
import namedtuple
8 from colly
.exceptions
import CsvImportError
10 import simplejson
as json
12 class Collate(object):
16 cols
= 0 # number of columns in CSV file
18 ''' Csv parsing and formatting.
20 def __init__(self
, csv_file
, **options
):
21 with
open(csv_file
, "rb") as f
:
24 ''' Determine given columns, check against CSV.
26 first_row
= raw
.next() # discards first row
28 if 'headings' in options
:
29 headings
= options
['headings']
30 f
.seek(0) # reset iter to first row ^
33 logging
.info('Interpretting headings as row #1')
35 self
.headings
= self
.pad(first_row
, headings
)
37 ''' OK, columns should be in order, ready to generate a map of the
40 index
, pk
= {}, 0 #: set empty vars
43 Head
= namedtuple("row", ",".join(self
.headings
), verbose
=False)
44 except ValueError, err
:
45 raise CsvImportError(err
)
47 for row
in map(Head
._make
, raw
):
48 ''' Perhaps not immediately obvious, index contains the full
49 dataset. The "pk" (primary key) should be unique, or if
50 not given it will be incremented.
52 if hasattr(row
, 'pk'):
57 self
.column
= index
#: saves to all to _rows, and the pk column as a set.
59 ''' Make properties use validation & write-once only
62 def column(self
): return False
70 def column(self
): return set(self
._rows
) #: NB this will rtn the pk column
74 def pad(self
, sample
, headings
):
75 ''' Padding rows (e.g. csv headings), trys to avoid annoying namedtuple
81 headings
= headings
or []
82 if len(sample
) < len(headings
):
83 raise CsvImportError("Given headings exceeded those in CSV")
85 for n
, col
in enumerate(sample
):
86 auto_heading
= alphabet(n
)
88 if re
.match(r
'^\s|,|$', headings
[n
]):
89 headings
[n
] = auto_heading
91 headings
.append(auto_heading
)
93 logging
.info("CSV columns given headings: %s" % (headings
))
96 def get_row(self
, pk
):
99 ''' Rehashing/ formatting
103 for i
in self
.column
:
104 m
[i
] = self
._rows
[i
]._asdict
() #: turn _rows (namedtuple) into dict => dump.
110 ''' returns letter of alphabet at 'n', then increments A2, B2, C2 eg:
111 >>> assert (alphabet(0), alphabet(25)) == ('A', 'Z')
112 >>> assert alphabet(26) == 'A2'
113 >>> assert alphabet(42) == 'Q2'
114 >>> assert alphabet(52) == 'A3'
117 return string
.uppercase
[n
]
120 step
= int(round((diff
% 1) * 26))
123 string
.uppercase
[step
],
124 str(int(diff
+1)) #: number of iterations of alphabet, start at '2'
127 if __name__
== "__main__":