-
Notifications
You must be signed in to change notification settings - Fork 9
/
process-ameritrade-2018.py
executable file
·111 lines (104 loc) · 3.95 KB
/
process-ameritrade-2018.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/python3
# In Ubuntu or Fedora, pdftotext is part of the poppler-utils package.
# Take the PDF file containing Form 1099-B and run these commands:
# pdftotext -enc UTF-8 -layout 1099-b.pdf 1099-b.txt
# python3 process-ameritrade-2018.py 1099-b.txt > 1099-b.csv
# python3 create-txf-2015.py 1099-b.csv > 1099-b.txf
# Make sure to enter a missing cost in TurboTax in Forms view.
import re
import sys
with open(sys.argv[1]) as f:
content = [x.strip('\n') for x in f.readlines()]
# The format for covered tax lot and noncovered tax lots is slightly different.
# Now that splitLine searches for a space, both formats are processed the same.
covered_columns = (0, 9, 40, 69, 85, 105, 132, 156)
noncovered_columns = (0, 9, 40, 69, 85, 105, 132, 156)
columns = covered_columns
def splitLine(line, columns):
columns = list(columns)
ncol = len(columns)
for i in range(1, ncol):
space = line.find(' ', columns[i], columns[i] + 7)
if space >= 0:
columns[i] = space
entries = []
for i in range(ncol):
end = len(line) if i == ncol - 1 else columns[i + 1]
entry = line[columns[i]:end]
entry = entry.strip().replace(',', '')
entries.append(entry)
return entries
box = 'A'
pat = re.compile(r'Box ([A-F]) checked')
records = []
state = 0
for line in content:
match = re.search(pat, line)
if match:
box = match.group(1)
if line.find(' COVERED TAX LOTS') >= 0:
columns = covered_columns
elif line.find(' NONCOVERED TAX LOTS') >= 0:
columns = noncovered_columns
if line.startswith('1a- Description of property/CUSIP/Symbol'):
state = 1
continue
if line.startswith(' Totals :') or line.startswith('* This is important tax information'):
state = 0
continue
if not state or not line:
continue
if state < 4:
state += 1
continue
if state == 4:
if line.startswith(' '):
continue
#print(line)
pos1 = line.index(' / CUSIP: ');
pos2 = line.index(' / Symbol:', pos1)
pos3 = pos2 + len(' / Symbol: ')
symbol = line[pos3:]
if not symbol:
pos3 = pos1 + len(' / CUSIP: ')
symbol = line[pos3:pos2]
pending = [line[:pos1], symbol]
state = 5
continue
# state == 5
records.append(pending + splitLine(line, columns) + [box])
state = 4
records.sort(key=lambda r: r[1])
pat = re.compile(r'( [PC])( [0-9.]+)0$')
for r in records:
match = re.search(pat, r[1])
if match:
r[1] = r[1][:match.start(1)] + match.group(2) + match.group(1)
net_proceeds = ''
if r[4].endswith(' N'):
net_proceeds = 'Net proceeds'
r[4] = r[4][:-2]
code = r[9]
#print(r[3])
count = float(r[3])
if code.lower().find('short') >= 0:
count = -count
# These codes do not get used elsewhere. I just set them to make the
# Ameritrade output more similar to the Schwab output.
if code == 'Short sale closed- option':
code = 'BC'
elif code == 'Option expiration short position' or code == 'Option expiration':
code = 'X'
adj = r[7]
if adj == '...':
adj = '--'
# From the instructions of TD Ameritrade Form 1099-B:
# Column 1f. Shows W for wash sale, C for collectibles, or D for market discount.
# Column 1g. Shows the amount of nondeductible loss in a wash sale
# transaction or the amount of accrued market discount. When the sale of a
# debt instrument is a wash sale and has accrued market discount, code W
# will be in column 1f and the amount of the wash sale loss disallowed will
# be in column 1g. For details on wash sales and market discount, see
# Schedule D (Form 1040) instructions and Pub. 550.
# One of Column 1f and 1g is in "adj" but I don't know which one (no example).
print('%s,%g,%s,%s,%s,%s,%s,%s,,,%s,%s,%s,%s' % (r[1], count, r[5], r[2], r[4], r[6], adj, r[8], r[10], code, r[0], net_proceeds))