2018/10/14

ofxファイルをCSVファイルに変換


概要

みずほ銀行の口座履歴はWEBでみるかofxファイルでしか操作できない。
Linuxにて適当なツールがなかったのでPythonで変換スクリプトを作る。


プログラム

file : ofx2csv.py
#!/usr/bin/python3

import sys
import re
import pandas as pd

args = sys.argv
argc = len(args)

if (argc != 2):
	print("Usage: %s input-file.ofx" % args[0])
	quit()

fname_source = args[1]

f = open(fname_source)
df = pd.DataFrame({'Date'  :[],
		   'DataID':[],
		   'Title' :[],
		   'Output':[],
		   'Input' :[],
		   'Total' :[]})
mode = 1
for line in f:
	#print(line,end="")
	if mode == 1:
		if re.match("^", line):
			#print("STMTTRN")
			mode += 1
		elif re.match("^", line):
			#print("LEDGERBAL")
			mode = 10
	elif mode == 2:
		if re.match("^", line):
			if line[8] == "-":
				flag_pm = -1
			else:
				flag_pm = 1
			val = flag_pm * int(line[9:])
			#print("Get value : %d" % val)
			mode += 1
	elif mode == 3:
		if re.match("^", line):
			#print("Get date : %s/%s/%s" % (line[10:14],line[14:16],line[16:18]))
			date = line[7:11] + "/" + line[11:13] + "/" + line[13:15]
			dateid = line[15:21]
			mode += 1
	elif mode == 4:
		if re.match("^", line):
			#print("Get memo : %s" % line[6:-1],end="")
			memo = line[6:-1]
			mode += 1
	elif mode == 5:
		if re.match("^<\/STMTTRN>", line):
			#print("STMTTRN end")
			if val >= 0:
				val_out = 0
				val_in = val
			else:
				val_out = val * -1
				val_in = 0
			df = df.append(pd.Series([date, dateid, memo, val_out, val_in, 0],index=['Date','DataID','Title','Output','Input','Total']),ignore_index=True)
			mode = 1
	elif mode == 10:
		if re.match("^", line):
			#print("Get BALAMT : %s" % line[6:],end="")
			if line[8] == "+":
				flag_pm = 1
			else:
				flag_pm = -1
			total = flag_pm * int(line[9:])
			#print("Get value : %d" % total)
			mode += 1
	elif mode == 11:
		if re.match("^<\/LEDGERBAL>", line):
			#print("LEDGERBAL end")
			#print("BALAMT : %d" % total)
			mode = 1
	else:
		print("Error")
		quit()
f.close()

df = df.sort_values(['Date','DataID'])
df.reset_index(inplace=True, drop=True)

for index in reversed(range(0,len(df))):
    df.at[index, 'Total'] = total
    vo = df.at[index, 'Output']
    vi = df.at[index, 'Input']
    total += vo - vi

print("\"Date\",\"Title\",\"Output\",\"Input\",\"Tatal\"")
for index, row in df.iterrows():
	print("\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"" % (row['Date'], row['Title'], "{:,}".format(int(row['Output'])), "{:,}".format(int(row['Input'])), "{:,}".format(int(row['Total']))))


実行結果

$ ofx2csv.py 2018092902354802370481264.ofx > 2018092902354802370481264.csv
$ cat 2018092902354802370481264.csv
"Date","Title","Output","Input","Tatal"
"2018/09/28","ローン","176,369","0","2,968,302"
"2018/09/28","ローン完済","2,697,566","0","270,736"