#!/usr/bin/python3 import sys import re import pandas as pd args = sys.argv argc = len(args) if (argc != 2): print("Usage: %s input-file.ofx" % args[0]) quit() fname_source = args[1] f = open(fname_source) df = pd.DataFrame({'Date' :[], 'DataID':[], 'Title' :[], 'Output':[], 'Input' :[], 'Total' :[]}) mode = 1 for line in f: #print(line,end="") if mode == 1: if re.match("^", line): #print("STMTTRN") mode += 1 elif re.match("^ ", line): #print("LEDGERBAL") mode = 10 elif mode == 2: if re.match("^ ", line): if line[8] == "-": flag_pm = -1 else: flag_pm = 1 val = flag_pm * int(line[9:]) #print("Get value : %d" % val) mode += 1 elif mode == 3: if re.match("^ ", line): #print("Get date : %s/%s/%s" % (line[10:14],line[14:16],line[16:18])) date = line[7:11] + "/" + line[11:13] + "/" + line[13:15] dateid = line[15:21] mode += 1 elif mode == 4: if re.match("^ ", line): #print("Get memo : %s" % line[6:-1],end="") memo = line[6:-1] mode += 1 elif mode == 5: if re.match("^<\/STMTTRN>", line): #print("STMTTRN end") if val >= 0: val_out = 0 val_in = val else: val_out = val * -1 val_in = 0 df = df.append(pd.Series([date, dateid, memo, val_out, val_in, 0],index=['Date','DataID','Title','Output','Input','Total']),ignore_index=True) mode = 1 elif mode == 10: if re.match("^ ", line): #print("Get BALAMT : %s" % line[6:],end="") if line[8] == "+": flag_pm = 1 else: flag_pm = -1 total = flag_pm * int(line[9:]) #print("Get value : %d" % total) mode += 1 elif mode == 11: if re.match("^<\/LEDGERBAL>", line): #print("LEDGERBAL end") #print("BALAMT : %d" % total) mode = 1 else: print("Error") quit() f.close() df = df.sort_values(['Date','DataID']) df.reset_index(inplace=True, drop=True) for index in reversed(range(0,len(df))): df.at[index, 'Total'] = total vo = df.at[index, 'Output'] vi = df.at[index, 'Input'] total += vo - vi print("\"Date\",\"Title\",\"Output\",\"Input\",\"Tatal\"") for index, row in df.iterrows(): print("\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"" % (row['Date'], row['Title'], "{:,}".format(int(row['Output'])), "{:,}".format(int(row['Input'])), "{:,}".format(int(row['Total']))))
$ ofx2csv.py 2018092902354802370481264.ofx > 2018092902354802370481264.csv $ cat 2018092902354802370481264.csv "Date","Title","Output","Input","Tatal" "2018/09/28","ローン","176,369","0","2,968,302" "2018/09/28","ローン完済","2,697,566","0","270,736"