View Transactions (#36)

akshowhini · web-flow · commit 2a6701c99c6e · 2021-04-19T20:30:36.000-04:00
* v2.2

* `fix_characters()` - Give user the ability to make character error corrections

* Invalid cross-device link Issue#35

* `view_transactions()` - View your transactions processed in the last 24 hours
diff --git a/ExtractTable/__init__.py b/ExtractTable/__init__.py
@@ -3,6 +3,7 @@
 """
 from urllib import parse as urlparse
 import os
+import shutil
 import typing as ty
 from typing import BinaryIO
 import time
@@ -67,6 +68,19 @@ def check_usage(self) -> dict:
         resp = self._make_request('get', HOST.VALIDATOR)
 
         return resp['usage']
+    
+    def view_transactions(self) -> list:
+        """
+        View your transactions in the past 24 hours
+        :return list of transactions; each record with
+            JobStatus: Status of the job
+            Pages: number of pages of the input; can also be considered as number of credits consumed
+            createdon: timestamp when the request was processed
+            requested_filename: Filename received in the request
+            txn_id: Unique identifier of the transaction, also referred as JobId when retrieving the output via get_result()
+        """
+        resp = self._make_request("GET", HOST.TRANSACTIONS)
+        return resp
 
     def get_result(self, job_id: str, wait_time: int = 10, max_wait_time: int = 300) -> dict:
         """
@@ -196,8 +210,7 @@ def save_output(self, output_folder: os.PathLike = "", output_format: str = "csv
                 warnings.warn(f"Your output_folder not exists. Saving the outputs to {output_folder}")
             else:
                 for each_tbl_path in table_outputs_path:
-                    os.replace(each_tbl_path, os.path.join(output_folder, input_fname+os.path.basename(each_tbl_path)))
-
+                    shutil.move(each_tbl_path, os.path.join(output_folder, input_fname+os.path.basename(each_tbl_path)))
         else:
             output_folder = os.path.split(table_outputs_path[0])[0]
 
diff --git a/ExtractTable/__version__.py b/ExtractTable/__version__.py
@@ -1,4 +1,4 @@
-VERSION = (2, 1, 2)
+VERSION = (2, 2, 0)
 PRERELEASE = None  # "alpha", "beta" or "rc"
 REVISION = None
 
diff --git a/ExtractTable/common.py b/ExtractTable/common.py
@@ -57,7 +57,11 @@ def _converter(self, fmt: str, indexing: bool = False, table_obj="TableJson") ->
         elif fmt in ("xlsx", "excel"):
             output_excel_location = os.path.join(tempfile.mkdtemp(), f"_tables_{len(dfs)}.xlsx")
             if len(dfs) >= 10:
-                warnings.warn(f"There are {dfs} tables extracted. Consider to change the output_format to 'csv' instead")
+                warnings.warn(f"There are {len(dfs)} tables extracted. Consider to change the output_format to 'csv' instead")
+            elif not len(dfs):
+                warnings.warn(f"There are {len(dfs)} tables extracted")
+                return []
+
             with pd.ExcelWriter(output_excel_location) as writer:
                 for n, df in enumerate(dfs):
                     df.to_excel(writer, f'table_{n+1}', index=indexing, header=indexing)
@@ -247,3 +251,26 @@ def fix_date_format(self, columns_idx: List[int] = None, delimiter: str = "/"):
             self.dataframes[df_idx] = df
 
         return self.dataframes
+    
+    def fix_characters(self, columns_idx: List[int] = None, replace_ref: dict = {}):
+        """
+        To replace incorrect character detections
+        Eg: $123,45.0I as $123,45.01
+        :param columns_idx: user preferred columns indices.
+                Default loops through all columns to find Date Columns
+        :param replace_ref: the replacement dictionary for reference
+                Eg: {"I": "1"}
+        :return: correted list of dataframes
+        """
+        for df_idx, df in enumerate(self.dataframes):
+            if not columns_idx:
+                columns_idx = df.columns
+            columns_idx = [str(x) for x in columns_idx]
+
+            for col_idx in columns_idx:
+                for find_ch, repl_ch in replace_ref.items():
+                    df[col_idx] = df[col_idx].str.replace(str(find_ch), str(repl_ch))
+
+            self.dataframes[df_idx] = df
+
+        return self.dataframes
diff --git a/ExtractTable/config.py b/ExtractTable/config.py
@@ -9,6 +9,7 @@ class HOST:
     TRIGGER = 'trigger.extracttable.com'
     RESULT = 'getresult.extracttable.com'
     BIGFILE = 'bigfile.extracttable.com'
+    TRANSACTIONS = 'viewtransactions.extracttable.com'
 
 
 class JobStatus:
diff --git a/example-code.ipynb b/example-code.ipynb
@@ -579,7 +579,8 @@
     "| Split Merged Rows    \t| Works well on cell values with no spaces       \t|\n",
     "| Split Merged Columns \t| Works well on cell values with no spaces       \t|\n",
     "| Fix Decimal Format   \t| To fix thousand and decimal separators         \t|\n",
-    "| Fix Date Format      \t| To handle and modify incorrect date separators \t|"
+    "| Fix Date Format      \t| To handle and modify incorrect date separators \t|\n",
+    "| Fix Characters       \t| To fix incorrect character detections          \t|\n"
    ],
    "metadata": {
     "collapsed": false
@@ -606,7 +607,11 @@
   {
    "cell_type": "markdown",
    "source": [
-    "## 6.1 Split Merged Rows"
+    "## 6.1 Split Merged Rows\n",
+    "        \"\"\"\n",
+    "        To split the merged rows into possible multiple rows\n",
+    "        :return: reformatted list of dataframes\n",
+    "        \"\"\"\n"
    ],
    "metadata": {
     "collapsed": false
@@ -629,7 +634,15 @@
   {
    "cell_type": "markdown",
    "source": [
-    "## 6.2 Split Merged Columns\n"
+    "## 6.2 Split Merged Columns\n",
+    "\n",
+    "        \"\"\"\n",
+    "        To split the merged columns into possible multiple columns\n",
+    "        :param columns_idx: user preferred columns indices.\n",
+    "                Default loops through all columns to find numeric or decimal columns\n",
+    "        :param force_split: To force split through the columns\n",
+    "        :return: reformatted list of dataframes\n",
+    "        \"\"\"\n"
    ],
    "metadata": {
     "collapsed": false
@@ -652,7 +665,18 @@
   {
    "cell_type": "markdown",
    "source": [
-    "## 6.3 Fix Decimal Format\n"
+    "## 6.3 Fix Decimal Format\n",
+    "\n",
+    "\n",
+    "        \"\"\"\n",
+    "        To fix decimal and thousands separator values. Often commas as detected as period\n",
+    "        :param columns_idx: user preferred columns indices.\n",
+    "                Default loops through all columns to find numeric or decimal columns\n",
+    "        :param decimal_separator: preferred decimal separator\n",
+    "        :param thousands_separator: preferred thousands separator\n",
+    "        :param decimal_position: preferred decimal position\n",
+    "        :return: corrected list of dataframes\n",
+    "        \"\"\"\n"
    ],
    "metadata": {
     "collapsed": false
@@ -675,7 +699,16 @@
   {
    "cell_type": "markdown",
    "source": [
-    "## 6.4 Fix Date Format\n"
+    "## 6.4 Fix Date Format\n",
+    "\n",
+    "        \"\"\"\n",
+    "        To fix date formats of the column\n",
+    "        Eg: 12|1212020 as 12/12/2020\n",
+    "        :param columns_idx: user preferred columns indices.\n",
+    "                Default loops through all columns to find Date Columns\n",
+    "        :param delimiter: \"/\" or \"-\" whatelse you prefer\n",
+    "        :return: correted list of dataframes\n",
+    "        \"\"\"\n"
    ],
    "metadata": {
     "collapsed": false
@@ -695,6 +728,39 @@
     }
    }
   },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## 6.5 Fix Characters\n",
+    "\n",
+    "        \"\"\"\n",
+    "        To replace incorrect character detections\n",
+    "        Eg: $123,45.0I as $123,45.01\n",
+    "        :param columns_idx: user preferred columns indices.\n",
+    "                Default loops through all columns to find Date Columns\n",
+    "        :param replace_ref: the replacement dictionary for reference\n",
+    "                Eg: {\"I\": \"1\"}\n",
+    "        :return: correted list of dataframes\n",
+    "        \"\"\"\n"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "corrected_table_dataframes = corrections.fix_characters(self, columns_idx=[0, 1], replace_ref={\"I\": \"1\", \"S\": \"$\"})"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
   {
    "cell_type": "markdown",
    "source": [
@@ -776,7 +842,49 @@
   {
    "cell_type": "markdown",
    "source": [
-    "# 8. Support & Contact\n",
+    "# 8. View transactions\n",
+    "\n",
+    "To view all the transactions that were triggered in the last 24 hours"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "et_sess.view_transactions()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Return a list of transactions, with each record having below properties\n",
+    "\n",
+    "|Property | Description |\n",
+    "|---|---|\n",
+    "|JobStatus | Status of the job|\n",
+    "|Pages | number of pages of the input; can also be considered as number of credits consumed|\n",
+    "|createdon | timestamp when the request was processed|\n",
+    "|requested_filename | Filename received in the request|\n",
+    "|txn_id | Unique identifier of the transaction, also referred as JobId to retrieve output via `get_result(JobId)`|"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "# 9. Support & Contact\n",
     "\n",
     "Please do not hesitate to approach our developer team at pydevs@extracttable.com for any assitance needed or to report a bug"
    ],

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-VERSION = (2, 1, 2)`
	`1`	`+VERSION = (2, 2, 0)`
`2`	`2`	`PRERELEASE = None # "alpha", "beta" or "rc"`
`3`	`3`	`REVISION = None`
`4`	`4`