From a1d321d65e59cc62ad883a33923a99876bfb0b04 Mon Sep 17 00:00:00 2001
From: Davis Silverman <davis@thedav.is>
Date: Sat, 22 Apr 2023 16:36:12 +0000
Subject: [PATCH] Add experimental new OFX importer (#921)

Hi there,

I try to tackle #798 here. It was suggested to throw this behind a
feature flag, so here it is!

this does its best to import the problem file in #767.

I am working on this because it would make my work on #918 easier :)

Feel free to set the feature flag to true and try the new importer. The
date parser is not as sophisticated as the one in `node-libofx`, but I
tried 3 different OFX files, one from my bank, one from the mocks, and
one from #767. They all seem to work well enough on that front, but this
is definitely the weak point of the new implementation.

Let me know what you think!
---
 .../components/modals/ImportTransactions.js   |  61 +++++++-
 packages/loot-core/package.json               |   1 +
 .../src/server/accounts/parse-file.js         | 141 ++++++++++++++++++
 upcoming-release-notes/921.md                 |   6 +
 yarn.lock                                     |  21 +++
 5 files changed, 228 insertions(+), 2 deletions(-)
 create mode 100644 upcoming-release-notes/921.md

diff --git a/packages/desktop-client/src/components/modals/ImportTransactions.js b/packages/desktop-client/src/components/modals/ImportTransactions.js
index 2b40a8471..897a81037 100644
--- a/packages/desktop-client/src/components/modals/ImportTransactions.js
+++ b/packages/desktop-client/src/components/modals/ImportTransactions.js
@@ -555,6 +555,7 @@ export function ImportTransactions({
   importTransactions,
   getPayees,
   savePrefs,
+  addNotification,
 }) {
   let [multiplierAmount, setMultiplierAmount] = useState('');
   let [loadingState, setLoadingState] = useState('parsing');
@@ -586,11 +587,67 @@ export function ImportTransactions({
     setFilename(filename);
     setFileType(filetype);
 
-    let { errors, transactions } = await parseTransactions(filename, options);
-
+    let results = await parseTransactions(filename, options);
+    let errors, transactions;
     setLoadingState(null);
     setError(null);
 
+    /// Do fine grained reporting between the old and new OFX importers.
+    if (results.ofxParser) {
+      if (results.which === 'both') {
+        // There were errors in the both parsers.
+        // Either a bad file, or a bug that must be fixed in the new!
+        // Show the new errors here.
+        console.log('Old errors: ', results.oldErrors);
+        console.log('New errors: ', results.newErrors);
+        errors = results.newErrors;
+        transactions = {};
+      } else if (results.which === 'old') {
+        // There were errors in the old, but not the new
+        // So the transactions here are from the new parser.
+        addNotification({
+          type: 'warning',
+          sticky: 'false',
+          message:
+            'Import was successful, but please [file a bug report](https://github.com/actualbudget/actual/issues/new?assignees=&labels=bug%2Cneeds+triage&template=bug-report.yml&title=%5BBug%5D%3A+New+OFX+Importer:+bad+old+parse:) and attach a redacted version of the file so we can fix our new algorithm.',
+        });
+        errors = [];
+        transactions = results.transactions;
+      } else if (results.which === 'new') {
+        // There were errors in the new, but not the old.
+        // So the transactions here are from the old parser.
+        // THIS IS A BUG AND SHOULD BE FIXED IN THE NEW PARSER!
+        addNotification({
+          type: 'warning',
+          sticky: 'false',
+          message:
+            'Import was successful, but please [file a bug report](https://github.com/actualbudget/actual/issues/new?assignees=&labels=bug%2Cneeds+triage&template=bug-report.yml&title=%5BBug%5D%3A+New+OFX+Importer:+bad+new+parse:) and attach a redacted version of the file so we can fix our new algorithm.',
+        });
+        errors = [];
+        transactions = results.transactions;
+      } else if (results.which === 'none') {
+        // Results were the same between the two!
+        errors = [];
+        transactions = results.transactions;
+      } else if (results.which === 'diff') {
+        // There was a difference in results between the two.
+        // use the old importer to be safe.
+        console.log('Different parse results');
+        console.log('Old OFX importer: ', results.oldTrans);
+        console.log('New OFX importer: ', results.newTrans);
+        addNotification({
+          type: 'warning',
+          sticky: 'false',
+          message:
+            'possible error importing file, please [file a bug report](https://github.com/actualbudget/actual/issues/new?assignees=&labels=bug%2Cneeds+triage&template=bug-report.yml&title=%5BBug%5D%3A+New+OFX+Importer:) and attach a redacted version of the file.',
+        });
+        transactions = results.oldTrans;
+        errors = [];
+      }
+    } else {
+      transactions = results.transactions;
+      errors = results.errors;
+    }
     if (errors.length > 0) {
       setError({
         parsed: true,
diff --git a/packages/loot-core/package.json b/packages/loot-core/package.json
index c580ecc04..36e6d6543 100644
--- a/packages/loot-core/package.json
+++ b/packages/loot-core/package.json
@@ -23,6 +23,7 @@
     "@rschedule/ical-tools": "^1.2.0",
     "@rschedule/json-tools": "^1.2.0",
     "@rschedule/standard-date-adapter": "^1.2.0",
+    "@wademason/ofx": "1.0.1",
     "absurd-sql": "0.0.53",
     "better-sqlite3": "^8.2.0",
     "core-js": "^3.8.3",
diff --git a/packages/loot-core/src/server/accounts/parse-file.js b/packages/loot-core/src/server/accounts/parse-file.js
index 750cecede..9f2b8211e 100644
--- a/packages/loot-core/src/server/accounts/parse-file.js
+++ b/packages/loot-core/src/server/accounts/parse-file.js
@@ -1,3 +1,4 @@
+import ofx from '@wademason/ofx';
 import csv2json from 'csv-parse/lib/sync';
 
 import * as fs from '../../platform/server/fs';
@@ -85,7 +86,147 @@ async function parseQIF(filepath) {
   };
 }
 
+// Extracts the YYYY-MM-DD from the following formats:
+/// * YYYYMMDDHHMMSS.XXX[gmt offset:tz name] (official OFX format)
+/// * 20190123120000
+/// If an OFX/QFX file is seen with a different format, it should
+/// be supported by this function, even if it is not spec-compliant.
+function ofxDateParse(raw) {
+  const longRx =
+    /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\.\d{3})?\[(.+):(.+)\]$/;
+  const shortRx = /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})$/;
+  let res = longRx.exec(raw);
+  if (res) {
+    const [_, y, m, day, _hr, _min, _sec, _milli, _offsetStr, _tz] = res;
+    return `${y}-${m}-${day}`;
+  } else {
+    res = shortRx.exec(raw);
+    if (res) {
+      const [_, y, m, day, _hr, _min, _sec] = res;
+      return `${y}-${m}-${day}`;
+    }
+  }
+  console.warn('Not able to parse date', raw);
+  return null;
+}
+
 async function parseOFX(filepath) {
+  let objectEq = (arg1, arg2) => {
+    if (
+      Object.prototype.toString.call(arg1) ===
+      Object.prototype.toString.call(arg2)
+    ) {
+      if (
+        Object.prototype.toString.call(arg1) === '[object Object]' ||
+        Object.prototype.toString.call(arg1) === '[object Array]'
+      ) {
+        if (Object.keys(arg1).length !== Object.keys(arg2).length) {
+          return false;
+        }
+        return Object.keys(arg1).every(function (key) {
+          return objectEq(arg1[key], arg2[key]);
+        });
+      } else if (typeof arg1 === 'number' && typeof arg2 === 'number') {
+        return Math.abs(arg1 - arg2) < Number.EPSILON;
+      }
+      return arg1 === arg2;
+    } else if (
+      (arg1 === undefined || arg1 === null) &&
+      (arg2 === undefined || arg2 === null)
+    ) {
+      return true;
+    }
+    return false;
+  };
+  const { transactions: oldTrans, errors: oldErrors } =
+    await parseOfxNodeLibofx(filepath);
+  const { transactions: newTrans, errors: newErrors } =
+    await parseOfxJavascript(filepath);
+  // send fine-grained information about how the parsing went, comparatively.
+  if (oldErrors.length > 0 && newErrors.length > 0) {
+    return {
+      ofxParser: true,
+      which: 'both',
+      errors: {
+        oldErrors,
+        newErrors,
+      },
+    };
+  } else if (oldErrors.length > 0) {
+    return {
+      ofxParser: true,
+      which: 'old',
+      errors: {
+        oldErrors,
+      },
+      transactions: newTrans,
+    };
+  } else if (newErrors.length > 0) {
+    return {
+      ofxParser: true,
+      which: 'new',
+      errors: {
+        newErrors,
+      },
+      transactions: oldTrans,
+    };
+  } else {
+    if (objectEq(oldTrans, newTrans)) {
+      return {
+        ofxParser: true,
+        which: 'none',
+        errors: [],
+        transactions: oldTrans,
+      };
+    } else {
+      return {
+        ofxParser: true,
+        which: 'diff',
+        errors: [],
+        oldTrans: oldTrans,
+        newTrans: newTrans,
+      };
+    }
+  }
+}
+
+async function parseOfxJavascript(filepath) {
+  let errors = [];
+  // 'binary' should be equal to latin1 in node.js
+  // not sure about browser. We want latin1 and not utf8.
+  // For some reason, utf8 does not parse ofx files correctly here.
+  const contents = new TextDecoder('latin1').decode(
+    await fs.readFile(filepath, 'binary'),
+  );
+
+  let data;
+  try {
+    data = ofx.parse(contents);
+  } catch (err) {
+    errors.push({
+      message: 'Failed importing file',
+      internal: err.stack,
+    });
+    return { errors };
+  }
+  // .STMTTRN may be a list or a single object.
+  const transactions = [
+    data.body.OFX.BANKMSGSRSV1.STMTTRNRS.STMTRS.BANKTRANLIST.STMTTRN,
+  ].flat();
+  return {
+    errors,
+    transactions: transactions.map(trans => ({
+      amount: parseFloat(trans.TRNAMT._text),
+      imported_id: trans.FITID._text,
+      date: trans.DTPOSTED ? ofxDateParse(trans.DTPOSTED._text) : null,
+      payee_name: trans.NAME?._text,
+      imported_payee: trans.NAME?._text,
+      notes: trans.MEMO?._text,
+    })),
+  };
+}
+
+async function parseOfxNodeLibofx(filepath) {
   let { getOFXTransactions, initModule } = await import(
     /* webpackChunkName: 'xfo' */ 'node-libofx'
   );
diff --git a/upcoming-release-notes/921.md b/upcoming-release-notes/921.md
new file mode 100644
index 000000000..7e1d900eb
--- /dev/null
+++ b/upcoming-release-notes/921.md
@@ -0,0 +1,6 @@
+---
+category: Features
+authors: [sinistersnare]
+---
+
+Add experimental OFX importer written in pure javascript.
diff --git a/yarn.lock b/yarn.lock
index 054995e0d..f15ab08ae 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -4302,6 +4302,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@wademason/ofx@npm:1.0.1":
+  version: 1.0.1
+  resolution: "@wademason/ofx@npm:1.0.1"
+  dependencies:
+    xml-js: ^1.6.11
+  checksum: 704f33ec70bb44f1612723a1ac216a938dcd47399414d5c39a7d95c365ffedbe033eb5812f732ab9e6f5dc6fcce318e6145a5ca058d4dad2d34b621bd786001e
+  languageName: node
+  linkType: hard
+
 "@webassemblyjs/ast@npm:1.11.1":
   version: 1.11.1
   resolution: "@webassemblyjs/ast@npm:1.11.1"
@@ -13429,6 +13438,7 @@ __metadata:
     "@types/jlongster__sql.js": "npm:@types/sql.js@latest"
     "@types/node-ipc": ^9.2.0
     "@types/webpack": ^5.28.0
+    "@wademason/ofx": 1.0.1
     absurd-sql: 0.0.53
     adm-zip: ^0.5.9
     babel-loader: ^8.0.6
@@ -21465,6 +21475,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"xml-js@npm:^1.6.11":
+  version: 1.6.11
+  resolution: "xml-js@npm:1.6.11"
+  dependencies:
+    sax: ^1.2.4
+  bin:
+    xml-js: ./bin/cli.js
+  checksum: 24a55479919413687105fc2d8ab05e613ebedb1c1bc12258a108e07cff5ef793779297db854800a4edf0281303ebd1f177bc4a588442f5344e62b3dddda26c2b
+  languageName: node
+  linkType: hard
+
 "xml-name-validator@npm:^3.0.0":
   version: 3.0.0
   resolution: "xml-name-validator@npm:3.0.0"
-- 
GitLab