From 8ef2c4013ad4917140e56ba8f2458a27da57dc9b Mon Sep 17 00:00:00 2001
From: Joel Jeremy Marquez <joeljeremy.marquez@gmail.com>
Date: Sun, 3 Sep 2023 10:33:06 -0700
Subject: [PATCH] Experimental OFX parser (#1600)

* Experimental OFX parser

* Release notes

* Enable enableExperimentalOfxParser in tests

* Move experimental ofx parser to ofx2json

* Enable experimental ofx parser by default

* Address PR comments
---
 packages/desktop-client/package.json          |   3 +-
 .../components/modals/ImportTransactions.js   |   8 ++
 .../src/components/settings/Experimental.tsx  |   3 +
 .../src/hooks/useFeatureFlag.ts               |   1 +
 packages/loot-core/src/mocks/files/8859-1.qfx |   2 +-
 .../loot-core/src/server/accounts/ofx2json.ts | 103 ++++++++++++++++++
 .../src/server/accounts/parse-file.test.ts    |   4 +-
 .../src/server/accounts/parse-file.ts         |  77 ++++++++++---
 packages/loot-core/src/types/prefs.d.ts       |   3 +-
 .../webpack/webpack.browser.config.js         |   2 +
 upcoming-release-notes/1600.md                |   6 +
 yarn.lock                                     |  20 +++-
 12 files changed, 209 insertions(+), 23 deletions(-)
 create mode 100644 packages/loot-core/src/server/accounts/ofx2json.ts
 create mode 100644 upcoming-release-notes/1600.md

diff --git a/packages/desktop-client/package.json b/packages/desktop-client/package.json
index 802f51ba5..87ef6d6fc 100644
--- a/packages/desktop-client/package.json
+++ b/packages/desktop-client/package.json
@@ -56,7 +56,8 @@
     "sass": "^1.63.6",
     "uuid": "^9.0.0",
     "victory": "^36.6.8",
-    "webpack-bundle-analyzer": "^4.9.0"
+    "webpack-bundle-analyzer": "^4.9.0",
+    "xml2js": "^0.6.2"
   },
   "scripts": {
     "start": "cross-env PORT=3001 react-app-rewired start",
diff --git a/packages/desktop-client/src/components/modals/ImportTransactions.js b/packages/desktop-client/src/components/modals/ImportTransactions.js
index e301c4503..699ab7db8 100644
--- a/packages/desktop-client/src/components/modals/ImportTransactions.js
+++ b/packages/desktop-client/src/components/modals/ImportTransactions.js
@@ -11,6 +11,7 @@ import {
 } from 'loot-core/src/shared/util';
 
 import { useActions } from '../../hooks/useActions';
+import useFeatureFlag from '../../hooks/useFeatureFlag';
 import { colors, styles } from '../../style';
 import Button, { ButtonWithLoading } from '../common/Button';
 import Input from '../common/Input';
@@ -585,6 +586,8 @@ export default function ImportTransactions({ modalProps, options }) {
 
   let [clearOnImport, setClearOnImport] = useState(true);
 
+  const enableExperimentalOfxParser = useFeatureFlag('experimentalOfxParser');
+
   async function parse(filename, options) {
     setLoadingState('parsing');
 
@@ -592,6 +595,11 @@ export default function ImportTransactions({ modalProps, options }) {
     setFilename(filename);
     setFileType(filetype);
 
+    options = {
+      ...options,
+      enableExperimentalOfxParser,
+    };
+
     let { errors, transactions } = await parseTransactions(filename, options);
     setLoadingState(null);
     setError(null);
diff --git a/packages/desktop-client/src/components/settings/Experimental.tsx b/packages/desktop-client/src/components/settings/Experimental.tsx
index dd32aaef1..37ea3ac5d 100644
--- a/packages/desktop-client/src/components/settings/Experimental.tsx
+++ b/packages/desktop-client/src/components/settings/Experimental.tsx
@@ -100,6 +100,9 @@ export default function ExperimentalFeatures() {
             </FeatureToggle>
 
             <FeatureToggle flag="privacyMode">Privacy mode</FeatureToggle>
+            <FeatureToggle flag="experimentalOfxParser">
+              Experimental OFX parser
+            </FeatureToggle>
 
             <ThemeFeature />
           </View>
diff --git a/packages/desktop-client/src/hooks/useFeatureFlag.ts b/packages/desktop-client/src/hooks/useFeatureFlag.ts
index aa7806309..70324fd6a 100644
--- a/packages/desktop-client/src/hooks/useFeatureFlag.ts
+++ b/packages/desktop-client/src/hooks/useFeatureFlag.ts
@@ -8,6 +8,7 @@ const DEFAULT_FEATURE_FLAG_STATE: Record<FeatureFlag, boolean> = {
   goalTemplatesEnabled: false,
   privacyMode: true,
   themes: false,
+  experimentalOfxParser: true,
 };
 
 export default function useFeatureFlag(name: FeatureFlag): boolean {
diff --git a/packages/loot-core/src/mocks/files/8859-1.qfx b/packages/loot-core/src/mocks/files/8859-1.qfx
index 8e678c9be..dc531c85b 100644
--- a/packages/loot-core/src/mocks/files/8859-1.qfx
+++ b/packages/loot-core/src/mocks/files/8859-1.qfx
@@ -45,7 +45,7 @@ NEWFILEUID:NONE
 <DTPOSTED>20221019120000
 <TRNAMT>-20.00
 <FITID>wSoKuCS77
-<NAME>Paiement facture/Carte prépayée
+<NAME>Paiement facture/Carte prépayée
 <MEMO>PWW
 </STMTTRN>
 </BANKTRANLIST>
diff --git a/packages/loot-core/src/server/accounts/ofx2json.ts b/packages/loot-core/src/server/accounts/ofx2json.ts
new file mode 100644
index 000000000..36199bc57
--- /dev/null
+++ b/packages/loot-core/src/server/accounts/ofx2json.ts
@@ -0,0 +1,103 @@
+import { parseStringPromise } from 'xml2js';
+
+import { dayFromDate } from '../../shared/months';
+
+type OFXTransaction = {
+  amount: string;
+  fitId: string;
+  name: string;
+  date: string;
+  memo: string;
+  type: string;
+};
+
+type OFXParseResult = {
+  headers: Record<string, unknown>;
+  transactions: OFXTransaction[];
+};
+
+function sgml2Xml(sgml) {
+  return sgml
+    .replace(/&/g, '&#038;') // Replace ampersands
+    .replace(/&amp;/g, '&#038;')
+    .replace(/>\s+</g, '><') // remove whitespace inbetween tag close/open
+    .replace(/\s+</g, '<') // remove whitespace before a close tag
+    .replace(/>\s+/g, '>') // remove whitespace after a close tag
+    .replace(/\.(?=[^<>]*>)/g, '') // Remove dots in tag names
+    .replace(/<(\w+?)>([^<]+)/g, '<$1>$2</<added>$1>') // Add a new end-tags for the ofx elements
+    .replace(/<\/<added>(\w+?)>(<\/\1>)?/g, '</$1>'); // Remove duplicate end-tags
+}
+
+async function parseXml(content) {
+  return await parseStringPromise(content, { explicitArray: false });
+}
+
+function getStmtTrn(data) {
+  const ofx = data?.['OFX'];
+  const isCc = ofx?.['CREDITCARDMSGSRSV1'] != null;
+  const msg = isCc ? ofx?.['CREDITCARDMSGSRSV1'] : ofx?.['BANKMSGSRSV1'];
+  const stmtTrnRs = msg?.[`${isCc ? 'CC' : ''}STMTTRNRS`];
+  const stmtRs = stmtTrnRs?.[`${isCc ? 'CC' : ''}STMTRS`];
+  const bankTranList = stmtRs?.['BANKTRANLIST'];
+  // Could be an array or a single object.
+  // xml2js serializes single item to an object and multiple to an array.
+  const stmtTrn = bankTranList?.['STMTTRN'];
+  if (!Array.isArray(stmtTrn)) {
+    return [stmtTrn];
+  }
+  return stmtTrn;
+}
+
+function mapOfxTransaction(stmtTrn): OFXTransaction {
+  // YYYYMMDDHHMMSS format. We just need the date.
+  const dtPosted = stmtTrn['DTPOSTED'];
+  const transactionDate = dtPosted
+    ? new Date(
+        Number(dtPosted.substring(0, 4)), // year
+        Number(dtPosted.substring(4, 6)) - 1, // month (zero-based index)
+        Number(dtPosted.substring(6, 8)), // date
+      )
+    : null;
+
+  return {
+    amount: stmtTrn['TRNAMT'],
+    type: stmtTrn['TRNTYPE'],
+    fitId: stmtTrn['FITID'],
+    date: dayFromDate(transactionDate),
+    name: stmtTrn['NAME'],
+    memo: stmtTrn['MEMO'],
+  };
+}
+
+export default async function parse(ofx: string): Promise<OFXParseResult> {
+  // firstly, split into the header attributes and the footer sgml
+  const contents = ofx.split('<OFX>', 2);
+
+  // firstly, parse the headers
+  const headerString = contents[0].split(/\r?\n/);
+  const headers = {};
+  headerString.forEach(attrs => {
+    if (attrs) {
+      const headAttr = attrs.split(/:/, 2);
+      headers[headAttr[0]] = headAttr[1];
+    }
+  });
+
+  // make the SGML and the XML
+  const content = `<OFX>${contents[1]}`;
+
+  // Parse the XML/SGML portion of the file into an object
+  // Try as XML first, and if that fails do the SGML->XML mangling
+  let dataParsed = null;
+  try {
+    dataParsed = await parseXml(content);
+  } catch (e) {
+    const sanitized = sgml2Xml(content);
+    dataParsed = await parseXml(sanitized);
+  }
+
+  return {
+    headers: headers,
+    transactions: getStmtTrn(dataParsed).map(mapOfxTransaction),
+  };
+}
diff --git a/packages/loot-core/src/server/accounts/parse-file.test.ts b/packages/loot-core/src/server/accounts/parse-file.test.ts
index 0c0c9a16a..7c03544d5 100644
--- a/packages/loot-core/src/server/accounts/parse-file.test.ts
+++ b/packages/loot-core/src/server/accounts/parse-file.test.ts
@@ -35,7 +35,9 @@ async function importFileWithRealTime(
 ) {
   // Emscripten requires a real Date.now!
   global.restoreDateNow();
-  let { errors, transactions } = await parseFile(filepath);
+  let { errors, transactions } = await parseFile(filepath, {
+    enableExperimentalOfxParser: true,
+  });
   global.restoreFakeDateNow();
 
   if (transactions) {
diff --git a/packages/loot-core/src/server/accounts/parse-file.ts b/packages/loot-core/src/server/accounts/parse-file.ts
index 4a3a2d1bc..67c243076 100644
--- a/packages/loot-core/src/server/accounts/parse-file.ts
+++ b/packages/loot-core/src/server/accounts/parse-file.ts
@@ -4,6 +4,7 @@ import * as fs from '../../platform/server/fs';
 import { dayFromDate } from '../../shared/months';
 import { looselyParseAmount } from '../../shared/util';
 
+import ofx2json from './ofx2json';
 import qif2json from './qif2json';
 
 type ParseError = { message: string; internal: string };
@@ -12,13 +13,16 @@ export type ParseFileResult = {
   transactions?: unknown[];
 };
 
+type ParseFileOptions = {
+  hasHeaderRow?: boolean;
+  delimiter?: string;
+  fallbackMissingPayeeToMemo?: boolean;
+  enableExperimentalOfxParser?: boolean;
+};
+
 export async function parseFile(
-  filepath,
-  options?: {
-    delimiter?: string;
-    hasHeaderRow: boolean;
-    fallbackMissingPayeeToMemo?: boolean;
-  },
+  filepath: string,
+  options?: ParseFileOptions,
 ): Promise<ParseFileResult> {
   let errors = Array<ParseError>();
   let m = filepath.match(/\.[^.]*$/);
@@ -43,14 +47,12 @@ export async function parseFile(
     message: 'Invalid file type',
     internal: '',
   });
-  return { errors, transactions: undefined };
+  return { errors, transactions: [] };
 }
 
 async function parseCSV(
-  filepath,
-  options: { delimiter?: string; hasHeaderRow: boolean } = {
-    hasHeaderRow: true,
-  },
+  filepath: string,
+  options?: ParseFileOptions,
 ): Promise<ParseFileResult> {
   let errors = Array<ParseError>();
   let contents = await fs.readFile(filepath);
@@ -58,9 +60,9 @@ async function parseCSV(
   let data;
   try {
     data = csv2json(contents, {
-      columns: options.hasHeaderRow,
+      columns: options?.hasHeaderRow,
       bom: true,
-      delimiter: options.delimiter || ',',
+      delimiter: options?.delimiter || ',',
       // eslint-disable-next-line rulesdir/typography
       quote: '"',
       trim: true,
@@ -78,7 +80,7 @@ async function parseCSV(
   return { errors, transactions: data };
 }
 
-async function parseQIF(filepath): Promise<ParseFileResult> {
+async function parseQIF(filepath: string): Promise<ParseFileResult> {
   let errors = Array<ParseError>();
   let contents = await fs.readFile(filepath);
 
@@ -106,10 +108,49 @@ async function parseQIF(filepath): Promise<ParseFileResult> {
 }
 
 async function parseOFX(
-  filepath,
-  options: { fallbackMissingPayeeToMemo?: boolean } = {
-    fallbackMissingPayeeToMemo: true,
-  },
+  filepath: string,
+  options?: ParseFileOptions,
+): Promise<ParseFileResult> {
+  if (!options?.enableExperimentalOfxParser) {
+    return parseOFXNodeLibOFX(filepath, options);
+  }
+
+  const errors = Array<ParseError>();
+  const contents = await fs.readFile(filepath);
+
+  let data;
+  try {
+    data = await ofx2json(contents);
+  } catch (err) {
+    errors.push({
+      message: 'Failed importing file',
+      internal: err.stack,
+    });
+    return { errors };
+  }
+
+  // Banks don't always implement the OFX standard properly
+  // If no payee is available try and fallback to memo
+  let useMemoFallback = options.fallbackMissingPayeeToMemo;
+
+  return {
+    errors,
+    transactions: data.transactions.map(trans => {
+      return {
+        amount: trans.amount,
+        imported_id: trans.fitId,
+        date: trans.date,
+        payee_name: trans.name || (useMemoFallback ? trans.memo : null),
+        imported_payee: trans.name || (useMemoFallback ? trans.memo : null),
+        notes: !!trans.name || !useMemoFallback ? trans.memo || null : null, //memo used for payee
+      };
+    }),
+  };
+}
+
+async function parseOFXNodeLibOFX(
+  filepath: string,
+  options: ParseFileOptions,
 ): Promise<ParseFileResult> {
   let { getOFXTransactions, initModule } = await import(
     /* webpackChunkName: 'xfo' */ 'node-libofx'
diff --git a/packages/loot-core/src/types/prefs.d.ts b/packages/loot-core/src/types/prefs.d.ts
index 18c8f52d1..56ea97a15 100644
--- a/packages/loot-core/src/types/prefs.d.ts
+++ b/packages/loot-core/src/types/prefs.d.ts
@@ -5,7 +5,8 @@ export type FeatureFlag =
   | 'reportBudget'
   | 'goalTemplatesEnabled'
   | 'privacyMode'
-  | 'themes';
+  | 'themes'
+  | 'experimentalOfxParser';
 
 export type LocalPrefs = Partial<
   {
diff --git a/packages/loot-core/webpack/webpack.browser.config.js b/packages/loot-core/webpack/webpack.browser.config.js
index d19dce0c2..b79ea9fe4 100644
--- a/packages/loot-core/webpack/webpack.browser.config.js
+++ b/packages/loot-core/webpack/webpack.browser.config.js
@@ -43,6 +43,8 @@ module.exports = {
       // used by memfs in a check which we can ignore I think
       url: false,
       zlib: require.resolve('browserify-zlib'),
+      // used by xml2js
+      timers: false,
     },
   },
   module: {
diff --git a/upcoming-release-notes/1600.md b/upcoming-release-notes/1600.md
new file mode 100644
index 000000000..6be98c834
--- /dev/null
+++ b/upcoming-release-notes/1600.md
@@ -0,0 +1,6 @@
+---
+category: Maintenance
+authors: [joel-jeremy]
+---
+
+Experimental OFX parser meant to replace node-libofx
diff --git a/yarn.lock b/yarn.lock
index ed981fb4c..1f8c911c7 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -95,6 +95,7 @@ __metadata:
     uuid: ^9.0.0
     victory: ^36.6.8
     webpack-bundle-analyzer: ^4.9.0
+    xml2js: ^0.6.2
   languageName: unknown
   linkType: soft
 
@@ -16738,7 +16739,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"sax@npm:^1.2.4, sax@npm:~1.2.4":
+"sax@npm:>=0.6.0, sax@npm:^1.2.4, sax@npm:~1.2.4":
   version: 1.2.4
   resolution: "sax@npm:1.2.4"
   checksum: d3df7d32b897a2c2f28e941f732c71ba90e27c24f62ee918bd4d9a8cfb3553f2f81e5493c7f0be94a11c1911b643a9108f231dd6f60df3fa9586b5d2e3e9e1fe
@@ -20146,6 +20147,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"xml2js@npm:^0.6.2":
+  version: 0.6.2
+  resolution: "xml2js@npm:0.6.2"
+  dependencies:
+    sax: ">=0.6.0"
+    xmlbuilder: ~11.0.0
+  checksum: 458a83806193008edff44562c0bdb982801d61ee7867ae58fd35fab781e69e17f40dfeb8fc05391a4648c9c54012066d3955fe5d993ffbe4dc63399023f32ac2
+  languageName: node
+  linkType: hard
+
 "xmlbuilder@npm:>=11.0.1, xmlbuilder@npm:^15.1.1":
   version: 15.1.1
   resolution: "xmlbuilder@npm:15.1.1"
@@ -20153,6 +20164,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"xmlbuilder@npm:~11.0.0":
+  version: 11.0.1
+  resolution: "xmlbuilder@npm:11.0.1"
+  checksum: 7152695e16f1a9976658215abab27e55d08b1b97bca901d58b048d2b6e106b5af31efccbdecf9b07af37c8377d8e7e821b494af10b3a68b0ff4ae60331b415b0
+  languageName: node
+  linkType: hard
+
 "xmlchars@npm:^2.2.0":
   version: 2.2.0
   resolution: "xmlchars@npm:2.2.0"
-- 
GitLab