From a1d321d65e59cc62ad883a33923a99876bfb0b04 Mon Sep 17 00:00:00 2001 From: Davis Silverman <davis@thedav.is> Date: Sat, 22 Apr 2023 16:36:12 +0000 Subject: [PATCH] Add experimental new OFX importer (#921) Hi there, I try to tackle #798 here. It was suggested to throw this behind a feature flag, so here it is! this does its best to import the problem file in #767. I am working on this because it would make my work on #918 easier :) Feel free to set the feature flag to true and try the new importer. The date parser is not as sophisticated as the one in `node-libofx`, but I tried 3 different OFX files, one from my bank, one from the mocks, and one from #767. They all seem to work well enough on that front, but this is definitely the weak point of the new implementation. Let me know what you think! --- .../components/modals/ImportTransactions.js | 61 +++++++- packages/loot-core/package.json | 1 + .../src/server/accounts/parse-file.js | 141 ++++++++++++++++++ upcoming-release-notes/921.md | 6 + yarn.lock | 21 +++ 5 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 upcoming-release-notes/921.md diff --git a/packages/desktop-client/src/components/modals/ImportTransactions.js b/packages/desktop-client/src/components/modals/ImportTransactions.js index 2b40a8471..897a81037 100644 --- a/packages/desktop-client/src/components/modals/ImportTransactions.js +++ b/packages/desktop-client/src/components/modals/ImportTransactions.js @@ -555,6 +555,7 @@ export function ImportTransactions({ importTransactions, getPayees, savePrefs, + addNotification, }) { let [multiplierAmount, setMultiplierAmount] = useState(''); let [loadingState, setLoadingState] = useState('parsing'); @@ -586,11 +587,67 @@ export function ImportTransactions({ setFilename(filename); setFileType(filetype); - let { errors, transactions } = await parseTransactions(filename, options); - + let results = await parseTransactions(filename, options); + let errors, transactions; setLoadingState(null); setError(null); + /// Do fine grained reporting between the old and new OFX importers. + if (results.ofxParser) { + if (results.which === 'both') { + // There were errors in the both parsers. + // Either a bad file, or a bug that must be fixed in the new! + // Show the new errors here. + console.log('Old errors: ', results.oldErrors); + console.log('New errors: ', results.newErrors); + errors = results.newErrors; + transactions = {}; + } else if (results.which === 'old') { + // There were errors in the old, but not the new + // So the transactions here are from the new parser. + addNotification({ + type: 'warning', + sticky: 'false', + message: + 'Import was successful, but please [file a bug report](https://github.com/actualbudget/actual/issues/new?assignees=&labels=bug%2Cneeds+triage&template=bug-report.yml&title=%5BBug%5D%3A+New+OFX+Importer:+bad+old+parse:) and attach a redacted version of the file so we can fix our new algorithm.', + }); + errors = []; + transactions = results.transactions; + } else if (results.which === 'new') { + // There were errors in the new, but not the old. + // So the transactions here are from the old parser. + // THIS IS A BUG AND SHOULD BE FIXED IN THE NEW PARSER! + addNotification({ + type: 'warning', + sticky: 'false', + message: + 'Import was successful, but please [file a bug report](https://github.com/actualbudget/actual/issues/new?assignees=&labels=bug%2Cneeds+triage&template=bug-report.yml&title=%5BBug%5D%3A+New+OFX+Importer:+bad+new+parse:) and attach a redacted version of the file so we can fix our new algorithm.', + }); + errors = []; + transactions = results.transactions; + } else if (results.which === 'none') { + // Results were the same between the two! + errors = []; + transactions = results.transactions; + } else if (results.which === 'diff') { + // There was a difference in results between the two. + // use the old importer to be safe. + console.log('Different parse results'); + console.log('Old OFX importer: ', results.oldTrans); + console.log('New OFX importer: ', results.newTrans); + addNotification({ + type: 'warning', + sticky: 'false', + message: + 'possible error importing file, please [file a bug report](https://github.com/actualbudget/actual/issues/new?assignees=&labels=bug%2Cneeds+triage&template=bug-report.yml&title=%5BBug%5D%3A+New+OFX+Importer:) and attach a redacted version of the file.', + }); + transactions = results.oldTrans; + errors = []; + } + } else { + transactions = results.transactions; + errors = results.errors; + } if (errors.length > 0) { setError({ parsed: true, diff --git a/packages/loot-core/package.json b/packages/loot-core/package.json index c580ecc04..36e6d6543 100644 --- a/packages/loot-core/package.json +++ b/packages/loot-core/package.json @@ -23,6 +23,7 @@ "@rschedule/ical-tools": "^1.2.0", "@rschedule/json-tools": "^1.2.0", "@rschedule/standard-date-adapter": "^1.2.0", + "@wademason/ofx": "1.0.1", "absurd-sql": "0.0.53", "better-sqlite3": "^8.2.0", "core-js": "^3.8.3", diff --git a/packages/loot-core/src/server/accounts/parse-file.js b/packages/loot-core/src/server/accounts/parse-file.js index 750cecede..9f2b8211e 100644 --- a/packages/loot-core/src/server/accounts/parse-file.js +++ b/packages/loot-core/src/server/accounts/parse-file.js @@ -1,3 +1,4 @@ +import ofx from '@wademason/ofx'; import csv2json from 'csv-parse/lib/sync'; import * as fs from '../../platform/server/fs'; @@ -85,7 +86,147 @@ async function parseQIF(filepath) { }; } +// Extracts the YYYY-MM-DD from the following formats: +/// * YYYYMMDDHHMMSS.XXX[gmt offset:tz name] (official OFX format) +/// * 20190123120000 +/// If an OFX/QFX file is seen with a different format, it should +/// be supported by this function, even if it is not spec-compliant. +function ofxDateParse(raw) { + const longRx = + /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\.\d{3})?\[(.+):(.+)\]$/; + const shortRx = /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})$/; + let res = longRx.exec(raw); + if (res) { + const [_, y, m, day, _hr, _min, _sec, _milli, _offsetStr, _tz] = res; + return `${y}-${m}-${day}`; + } else { + res = shortRx.exec(raw); + if (res) { + const [_, y, m, day, _hr, _min, _sec] = res; + return `${y}-${m}-${day}`; + } + } + console.warn('Not able to parse date', raw); + return null; +} + async function parseOFX(filepath) { + let objectEq = (arg1, arg2) => { + if ( + Object.prototype.toString.call(arg1) === + Object.prototype.toString.call(arg2) + ) { + if ( + Object.prototype.toString.call(arg1) === '[object Object]' || + Object.prototype.toString.call(arg1) === '[object Array]' + ) { + if (Object.keys(arg1).length !== Object.keys(arg2).length) { + return false; + } + return Object.keys(arg1).every(function (key) { + return objectEq(arg1[key], arg2[key]); + }); + } else if (typeof arg1 === 'number' && typeof arg2 === 'number') { + return Math.abs(arg1 - arg2) < Number.EPSILON; + } + return arg1 === arg2; + } else if ( + (arg1 === undefined || arg1 === null) && + (arg2 === undefined || arg2 === null) + ) { + return true; + } + return false; + }; + const { transactions: oldTrans, errors: oldErrors } = + await parseOfxNodeLibofx(filepath); + const { transactions: newTrans, errors: newErrors } = + await parseOfxJavascript(filepath); + // send fine-grained information about how the parsing went, comparatively. + if (oldErrors.length > 0 && newErrors.length > 0) { + return { + ofxParser: true, + which: 'both', + errors: { + oldErrors, + newErrors, + }, + }; + } else if (oldErrors.length > 0) { + return { + ofxParser: true, + which: 'old', + errors: { + oldErrors, + }, + transactions: newTrans, + }; + } else if (newErrors.length > 0) { + return { + ofxParser: true, + which: 'new', + errors: { + newErrors, + }, + transactions: oldTrans, + }; + } else { + if (objectEq(oldTrans, newTrans)) { + return { + ofxParser: true, + which: 'none', + errors: [], + transactions: oldTrans, + }; + } else { + return { + ofxParser: true, + which: 'diff', + errors: [], + oldTrans: oldTrans, + newTrans: newTrans, + }; + } + } +} + +async function parseOfxJavascript(filepath) { + let errors = []; + // 'binary' should be equal to latin1 in node.js + // not sure about browser. We want latin1 and not utf8. + // For some reason, utf8 does not parse ofx files correctly here. + const contents = new TextDecoder('latin1').decode( + await fs.readFile(filepath, 'binary'), + ); + + let data; + try { + data = ofx.parse(contents); + } catch (err) { + errors.push({ + message: 'Failed importing file', + internal: err.stack, + }); + return { errors }; + } + // .STMTTRN may be a list or a single object. + const transactions = [ + data.body.OFX.BANKMSGSRSV1.STMTTRNRS.STMTRS.BANKTRANLIST.STMTTRN, + ].flat(); + return { + errors, + transactions: transactions.map(trans => ({ + amount: parseFloat(trans.TRNAMT._text), + imported_id: trans.FITID._text, + date: trans.DTPOSTED ? ofxDateParse(trans.DTPOSTED._text) : null, + payee_name: trans.NAME?._text, + imported_payee: trans.NAME?._text, + notes: trans.MEMO?._text, + })), + }; +} + +async function parseOfxNodeLibofx(filepath) { let { getOFXTransactions, initModule } = await import( /* webpackChunkName: 'xfo' */ 'node-libofx' ); diff --git a/upcoming-release-notes/921.md b/upcoming-release-notes/921.md new file mode 100644 index 000000000..7e1d900eb --- /dev/null +++ b/upcoming-release-notes/921.md @@ -0,0 +1,6 @@ +--- +category: Features +authors: [sinistersnare] +--- + +Add experimental OFX importer written in pure javascript. diff --git a/yarn.lock b/yarn.lock index 054995e0d..f15ab08ae 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4302,6 +4302,15 @@ __metadata: languageName: node linkType: hard +"@wademason/ofx@npm:1.0.1": + version: 1.0.1 + resolution: "@wademason/ofx@npm:1.0.1" + dependencies: + xml-js: ^1.6.11 + checksum: 704f33ec70bb44f1612723a1ac216a938dcd47399414d5c39a7d95c365ffedbe033eb5812f732ab9e6f5dc6fcce318e6145a5ca058d4dad2d34b621bd786001e + languageName: node + linkType: hard + "@webassemblyjs/ast@npm:1.11.1": version: 1.11.1 resolution: "@webassemblyjs/ast@npm:1.11.1" @@ -13429,6 +13438,7 @@ __metadata: "@types/jlongster__sql.js": "npm:@types/sql.js@latest" "@types/node-ipc": ^9.2.0 "@types/webpack": ^5.28.0 + "@wademason/ofx": 1.0.1 absurd-sql: 0.0.53 adm-zip: ^0.5.9 babel-loader: ^8.0.6 @@ -21465,6 +21475,17 @@ __metadata: languageName: node linkType: hard +"xml-js@npm:^1.6.11": + version: 1.6.11 + resolution: "xml-js@npm:1.6.11" + dependencies: + sax: ^1.2.4 + bin: + xml-js: ./bin/cli.js + checksum: 24a55479919413687105fc2d8ab05e613ebedb1c1bc12258a108e07cff5ef793779297db854800a4edf0281303ebd1f177bc4a588442f5344e62b3dddda26c2b + languageName: node + linkType: hard + "xml-name-validator@npm:^3.0.0": version: 3.0.0 resolution: "xml-name-validator@npm:3.0.0" -- GitLab