-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* fixed a lot of issues due to inconsistencies of the code initially
written for Python2, mainly confusions between byte strings and unicode strings. The new code tries to use byte strings for file-like streams and unicode strings for in-memory objects. * added JavaDoc strings to document thoroughly functions and methods, mainly in odf/opendocument.py * added a config.dox file to allow one to build a documentation in HTML and LaTeX, for library developpers and users. * added many assert() clauses to ensure the types of parameters in functions and methods * modified the utilities to make them usable with both Python2 and Python3 * implemented a feature announced in csv2ods manfiles but not previously active: -c / --encoding switch to take in account the encoding of the CSV source file. * added rules to build the developer's documentation and install it in usr/share/python-odf/API-doc/html * added a dependency python-odf-doc -> libjs-jquery necessary because of HTML code output by Doxygen * -- Georges Khaznadar <[email protected]> Tue, 28 Oct 2014 10:41:32 +0100
- Loading branch information
Showing
40 changed files
with
1,301 additions
and
767 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,147 +16,214 @@ | |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
# | ||
# Contributor(s): | ||
# | ||
# Søren Roug | ||
# | ||
# Oct 2014: Georges Khaznadar <[email protected]> | ||
# - ported to Python3 | ||
# - imlemented the missing switch -c / --encoding, with an extra | ||
# feature for POSIX platforms which can guess encoding. | ||
|
||
from odf.opendocument import OpenDocumentSpreadsheet | ||
from odf.style import Style, TextProperties, ParagraphProperties, TableColumnProperties | ||
from odf.text import P | ||
from odf.table import Table, TableColumn, TableRow, TableCell | ||
from optparse import OptionParser | ||
import sys,csv,re, os | ||
import sys,csv,re, os, codecs | ||
|
||
if sys.version_info.major==3: unicode=str | ||
|
||
if sys.version_info.major==2: | ||
class UTF8Recoder: | ||
""" | ||
Iterator that reads an encoded stream and reencodes the input to UTF-8 | ||
""" | ||
def __init__(self, f, encoding): | ||
self.reader = codecs.getreader(encoding)(f) | ||
|
||
def __iter__(self): | ||
return self | ||
|
||
def next(self): | ||
return self.reader.next().encode("utf-8") | ||
|
||
class UnicodeReader: | ||
""" | ||
A CSV reader which will iterate over lines in the CSV file "f", | ||
which is encoded in the given encoding. | ||
""" | ||
|
||
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): | ||
f = UTF8Recoder(f, encoding) | ||
self.reader = csv.reader(f, dialect=dialect, **kwds) | ||
|
||
def next(self): | ||
row = self.reader.next() | ||
return [unicode(s, "utf-8") for s in row] | ||
|
||
def __iter__(self): | ||
return self | ||
|
||
|
||
def csvToOds( pathFileCSV, pathFileODS, tableName='table', | ||
delimiter=',', quoting=csv.QUOTE_MINIMAL, | ||
quotechar = '"', escapechar = None, | ||
skipinitialspace = False, lineterminator = '\r\n'): | ||
textdoc = OpenDocumentSpreadsheet() | ||
# Create a style for the table content. One we can modify | ||
# later in the word processor. | ||
tablecontents = Style(name="Table Contents", family="paragraph") | ||
tablecontents.addElement(ParagraphProperties(numberlines="false", linenumber="0")) | ||
tablecontents.addElement(TextProperties(fontweight="bold")) | ||
textdoc.styles.addElement(tablecontents) | ||
|
||
# Start the table | ||
table = Table( name=tableName ) | ||
output = os.popen('/usr/bin/file ' + pathFileCSV).read() | ||
m=re.match(r'^.*: ([-a-zA-Z0-9]+) text$', output) | ||
if m: | ||
encoding=m.group(1) | ||
if 'ISO-8859' in encoding: | ||
encoding="latin-1" | ||
else: | ||
encoding="utf-8" | ||
|
||
reader = csv.reader(open(pathFileCSV, encoding=encoding), | ||
delimiter=delimiter, | ||
quoting=quoting, | ||
quotechar=quotechar, | ||
escapechar=escapechar, | ||
skipinitialspace=skipinitialspace, | ||
lineterminator=lineterminator) | ||
fltExp = re.compile('^\s*[-+]?\d+(\.\d+)?\s*$') | ||
|
||
for row in reader: | ||
tr = TableRow() | ||
table.addElement(tr) | ||
for val in row: | ||
if fltExp.match(val): | ||
tc = TableCell(valuetype="float", value=val.strip()) | ||
else: | ||
tc = TableCell(valuetype="string") | ||
tr.addElement(tc) | ||
p = P(stylename=tablecontents,text=val) | ||
tc.addElement(p) | ||
|
||
textdoc.spreadsheet.addElement(table) | ||
textdoc.save( pathFileODS ) | ||
|
||
delimiter=',', quoting=csv.QUOTE_MINIMAL, | ||
quotechar = '"', escapechar = None, | ||
skipinitialspace = False, lineterminator = '\r\n', | ||
encoding="utf-8"): | ||
textdoc = OpenDocumentSpreadsheet() | ||
# Create a style for the table content. One we can modify | ||
# later in the word processor. | ||
tablecontents = Style(name="Table Contents", family="paragraph") | ||
tablecontents.addElement(ParagraphProperties(numberlines="false", linenumber="0")) | ||
tablecontents.addElement(TextProperties(fontweight="bold")) | ||
textdoc.styles.addElement(tablecontents) | ||
|
||
# Start the table | ||
table = Table( name=tableName ) | ||
|
||
if sys.version_info.major==3: | ||
reader = csv.reader(open(pathFileCSV, encoding=encoding), | ||
delimiter=delimiter, | ||
quoting=quoting, | ||
quotechar=quotechar, | ||
escapechar=escapechar, | ||
skipinitialspace=skipinitialspace, | ||
lineterminator=lineterminator) | ||
else: | ||
reader = UnicodeReader(open(pathFileCSV), | ||
encoding=encoding, | ||
delimiter=delimiter, | ||
quoting=quoting, | ||
quotechar=quotechar, | ||
escapechar=escapechar, | ||
skipinitialspace=skipinitialspace, | ||
lineterminator=lineterminator) | ||
fltExp = re.compile('^\s*[-+]?\d+(\.\d+)?\s*$') | ||
|
||
for row in reader: | ||
tr = TableRow() | ||
table.addElement(tr) | ||
for val in row: | ||
if fltExp.match(val): | ||
tc = TableCell(valuetype="float", value=val.strip()) | ||
else: | ||
tc = TableCell(valuetype="string") | ||
tr.addElement(tc) | ||
p = P(stylename=tablecontents,text=val) | ||
tc.addElement(p) | ||
|
||
textdoc.spreadsheet.addElement(table) | ||
textdoc.save( pathFileODS ) | ||
|
||
if __name__ == "__main__": | ||
usage = "%prog -i file.csv -o file.ods -d" | ||
parser = OptionParser(usage=usage, version="%prog 0.1") | ||
parser.add_option('-i','--input', action='store', | ||
dest='input', help='File input in csv') | ||
parser.add_option('-o','--output', action='store', | ||
dest='output', help='File output in ods') | ||
parser.add_option('-d','--delimiter', action='store', | ||
dest='delimiter', help='specifies a one-character string to use as the field separator. It defaults to ",".') | ||
|
||
parser.add_option('-c','--encoding', action='store', | ||
dest='encoding', help='specifies the encoding the file csv. It defaults to utf-8') | ||
|
||
parser.add_option('-t','--table', action='store', | ||
dest='tableName', help='The table name in the output file') | ||
|
||
parser.add_option('-s','--skipinitialspace', | ||
dest='skipinitialspace', help='''specifies how to interpret whitespace which | ||
immediately follows a delimiter. It defaults to False, which | ||
means that whitespace immediately following a delimiter is part | ||
of the following field.''') | ||
|
||
parser.add_option('-l','--lineterminator', action='store', | ||
dest='lineterminator', help='''specifies the character sequence which should | ||
terminate rows.''') | ||
|
||
parser.add_option('-q','--quoting', action='store', | ||
dest='quoting', help='''It can take on any of the following module constants: | ||
0 = QUOTE_MINIMAL means only when required, for example, when a field contains either the quotechar or the delimiter | ||
1 = QUOTE_ALL means that quotes are always placed around fields. | ||
2 = QUOTE_NONNUMERIC means that quotes are always placed around fields which do not parse as integers or floating point numbers. | ||
3 = QUOTE_NONE means that quotes are never placed around fields. | ||
It defaults is QUOTE_MINIMAL''') | ||
|
||
parser.add_option('-e','--escapechar', action='store', | ||
dest='escapechar', help='''specifies a one-character string used to escape the delimiter when quoting is set to QUOTE_NONE.''') | ||
|
||
parser.add_option('-r','--quotechar', action='store', | ||
dest='quotechar', help='''specifies a one-character string to use as the quoting character. It defaults to ".''') | ||
|
||
(options, args) = parser.parse_args() | ||
|
||
if options.input: | ||
pathFileCSV = options.input | ||
else: | ||
parser.print_help() | ||
exit( 0 ) | ||
|
||
if options.output: | ||
pathFileODS = options.output | ||
else: | ||
parser.print_help() | ||
exit( 0 ) | ||
|
||
if options.delimiter: | ||
delimiter = options.delimiter | ||
else: | ||
delimiter = "," | ||
|
||
if options.skipinitialspace: | ||
skipinitialspace = True | ||
else: | ||
skipinitialspace=False | ||
|
||
if options.lineterminator: | ||
lineterminator = options.lineterminator | ||
else: | ||
lineterminator ="\r\n" | ||
|
||
if options.escapechar: | ||
escapechar = options.escapechar | ||
else: | ||
escapechar=None | ||
|
||
if options.tableName: | ||
tableName = options.tableName | ||
else: | ||
tableName = "table" | ||
|
||
if options.quotechar: | ||
quotechar = options.quotechar | ||
else: | ||
quotechar = "\"" | ||
|
||
csvToOds( pathFileCSV=pathFileCSV, pathFileODS=pathFileODS, | ||
delimiter=delimiter, skipinitialspace=skipinitialspace, | ||
escapechar=escapechar, lineterminator=lineterminator, | ||
tableName=tableName, quotechar=quotechar) | ||
usage = "%prog -i file.csv -o file.ods -d" | ||
parser = OptionParser(usage=usage, version="%prog 0.1") | ||
parser.add_option('-i','--input', action='store', | ||
dest='input', help='File input in csv') | ||
parser.add_option('-o','--output', action='store', | ||
dest='output', help='File output in ods') | ||
parser.add_option('-d','--delimiter', action='store', | ||
dest='delimiter', help='specifies a one-character string to use as the field separator. It defaults to ",".') | ||
|
||
parser.add_option('-c','--encoding', action='store', | ||
dest='encoding', help='specifies the encoding the file csv. It defaults to utf-8') | ||
|
||
parser.add_option('-t','--table', action='store', | ||
dest='tableName', help='The table name in the output file') | ||
|
||
parser.add_option('-s','--skipinitialspace', | ||
dest='skipinitialspace', help='''specifies how to interpret whitespace which | ||
immediately follows a delimiter. It defaults to False, which | ||
means that whitespace immediately following a delimiter is part | ||
of the following field.''') | ||
|
||
parser.add_option('-l','--lineterminator', action='store', | ||
dest='lineterminator', help='''specifies the character sequence which should | ||
terminate rows.''') | ||
|
||
parser.add_option('-q','--quoting', action='store', | ||
dest='quoting', help='''It can take on any of the following module constants: | ||
0 = QUOTE_MINIMAL means only when required, for example, when a field contains either the quotechar or the delimiter | ||
1 = QUOTE_ALL means that quotes are always placed around fields. | ||
2 = QUOTE_NONNUMERIC means that quotes are always placed around fields which do not parse as integers or floating point numbers. | ||
3 = QUOTE_NONE means that quotes are never placed around fields. | ||
It defaults is QUOTE_MINIMAL''') | ||
|
||
parser.add_option('-e','--escapechar', action='store', | ||
dest='escapechar', help='''specifies a one-character string used to escape the delimiter when quoting is set to QUOTE_NONE.''') | ||
|
||
parser.add_option('-r','--quotechar', action='store', | ||
dest='quotechar', help='''specifies a one-character string to use as the quoting character. It defaults to ".''') | ||
|
||
(options, args) = parser.parse_args() | ||
|
||
if options.input: | ||
pathFileCSV = options.input | ||
else: | ||
parser.print_help() | ||
exit( 0 ) | ||
|
||
if options.output: | ||
pathFileODS = options.output | ||
else: | ||
parser.print_help() | ||
exit( 0 ) | ||
|
||
if options.delimiter: | ||
delimiter = options.delimiter | ||
else: | ||
delimiter = "," | ||
|
||
if options.skipinitialspace: | ||
skipinitialspace = True | ||
else: | ||
skipinitialspace=False | ||
|
||
if options.lineterminator: | ||
lineterminator = options.lineterminator | ||
else: | ||
lineterminator ="\r\n" | ||
|
||
if options.escapechar: | ||
escapechar = options.escapechar | ||
else: | ||
escapechar=None | ||
|
||
if options.tableName: | ||
tableName = options.tableName | ||
else: | ||
tableName = "table" | ||
|
||
if options.quotechar: | ||
quotechar = options.quotechar | ||
else: | ||
quotechar = "\"" | ||
|
||
encoding = "utf-8" # default setting | ||
########################################################### | ||
## try to guess the encoding; this is implemented only with | ||
## POSIX platforms. Can it be improved? | ||
output = os.popen('/usr/bin/file ' + pathFileCSV).read() | ||
m=re.match(r'^.*: ([-a-zA-Z0-9]+) text$', output) | ||
if m: | ||
encoding=m.group(1) | ||
if 'ISO-8859' in encoding: | ||
encoding="latin-1" | ||
else: | ||
encoding="utf-8" | ||
############################################################ | ||
# when the -c or --coding switch is used, it takes precedence | ||
if options.encoding: | ||
encoding = options.encoding | ||
|
||
csvToOds( pathFileCSV=unicode(pathFileCSV), | ||
pathFileODS=unicode(pathFileODS), | ||
delimiter=delimiter, skipinitialspace=skipinitialspace, | ||
escapechar=escapechar, | ||
lineterminator=unicode(lineterminator), | ||
tableName=tableName, quotechar=quotechar, | ||
encoding=encoding) | ||
|
||
# Local Variables: *** | ||
# mode: python *** | ||
# End: *** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,13 @@ txt: mailodf.txt | |
xmlto txt $< | ||
|
||
clean: | ||
rm -f *.1 *.txt odf | ||
rm -f *.1 *~ *.txt odf | ||
odf: | ||
ln -s ../odf | ||
|
||
test: clean odf | ||
@echo -n "Please input your local email for the test > "; read to; \ | ||
python2 mailodf -f [email protected] -s "F.Y.I" ../tests/examples/ol.odp "$$to"; \ | ||
python3 mailodf -f [email protected] -s "F.Y.I" ../tests/examples/ol.odp "$$to" | ||
@echo 'You should receive two e-mails, one from [email protected]' | ||
@echo 'and the second from [email protected], with subjects: "F.Y.I"' |
Oops, something went wrong.