Skip to content

Commit 2a6701c

Browse files
authored
View Transactions (#36)
* v2.2 * `fix_characters()` - Give user the ability to make character error corrections * Invalid cross-device link Issue#35 * `view_transactions()` - View your transactions processed in the last 24 hours
1 parent c48fe70 commit 2a6701c

File tree

5 files changed

+159
-10
lines changed

5 files changed

+159
-10
lines changed

ExtractTable/__init__.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44
from urllib import parse as urlparse
55
import os
6+
import shutil
67
import typing as ty
78
from typing import BinaryIO
89
import time
@@ -67,6 +68,19 @@ def check_usage(self) -> dict:
6768
resp = self._make_request('get', HOST.VALIDATOR)
6869

6970
return resp['usage']
71+
72+
def view_transactions(self) -> list:
73+
"""
74+
View your transactions in the past 24 hours
75+
:return list of transactions; each record with
76+
JobStatus: Status of the job
77+
Pages: number of pages of the input; can also be considered as number of credits consumed
78+
createdon: timestamp when the request was processed
79+
requested_filename: Filename received in the request
80+
txn_id: Unique identifier of the transaction, also referred as JobId when retrieving the output via get_result()
81+
"""
82+
resp = self._make_request("GET", HOST.TRANSACTIONS)
83+
return resp
7084

7185
def get_result(self, job_id: str, wait_time: int = 10, max_wait_time: int = 300) -> dict:
7286
"""
@@ -196,8 +210,7 @@ def save_output(self, output_folder: os.PathLike = "", output_format: str = "csv
196210
warnings.warn(f"Your output_folder not exists. Saving the outputs to {output_folder}")
197211
else:
198212
for each_tbl_path in table_outputs_path:
199-
os.replace(each_tbl_path, os.path.join(output_folder, input_fname+os.path.basename(each_tbl_path)))
200-
213+
shutil.move(each_tbl_path, os.path.join(output_folder, input_fname+os.path.basename(each_tbl_path)))
201214
else:
202215
output_folder = os.path.split(table_outputs_path[0])[0]
203216

ExtractTable/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
VERSION = (2, 1, 2)
1+
VERSION = (2, 2, 0)
22
PRERELEASE = None # "alpha", "beta" or "rc"
33
REVISION = None
44

ExtractTable/common.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ def _converter(self, fmt: str, indexing: bool = False, table_obj="TableJson") ->
5757
elif fmt in ("xlsx", "excel"):
5858
output_excel_location = os.path.join(tempfile.mkdtemp(), f"_tables_{len(dfs)}.xlsx")
5959
if len(dfs) >= 10:
60-
warnings.warn(f"There are {dfs} tables extracted. Consider to change the output_format to 'csv' instead")
60+
warnings.warn(f"There are {len(dfs)} tables extracted. Consider to change the output_format to 'csv' instead")
61+
elif not len(dfs):
62+
warnings.warn(f"There are {len(dfs)} tables extracted")
63+
return []
64+
6165
with pd.ExcelWriter(output_excel_location) as writer:
6266
for n, df in enumerate(dfs):
6367
df.to_excel(writer, f'table_{n+1}', index=indexing, header=indexing)
@@ -247,3 +251,26 @@ def fix_date_format(self, columns_idx: List[int] = None, delimiter: str = "/"):
247251
self.dataframes[df_idx] = df
248252

249253
return self.dataframes
254+
255+
def fix_characters(self, columns_idx: List[int] = None, replace_ref: dict = {}):
256+
"""
257+
To replace incorrect character detections
258+
Eg: $123,45.0I as $123,45.01
259+
:param columns_idx: user preferred columns indices.
260+
Default loops through all columns to find Date Columns
261+
:param replace_ref: the replacement dictionary for reference
262+
Eg: {"I": "1"}
263+
:return: correted list of dataframes
264+
"""
265+
for df_idx, df in enumerate(self.dataframes):
266+
if not columns_idx:
267+
columns_idx = df.columns
268+
columns_idx = [str(x) for x in columns_idx]
269+
270+
for col_idx in columns_idx:
271+
for find_ch, repl_ch in replace_ref.items():
272+
df[col_idx] = df[col_idx].str.replace(str(find_ch), str(repl_ch))
273+
274+
self.dataframes[df_idx] = df
275+
276+
return self.dataframes

ExtractTable/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ class HOST:
99
TRIGGER = 'trigger.extracttable.com'
1010
RESULT = 'getresult.extracttable.com'
1111
BIGFILE = 'bigfile.extracttable.com'
12+
TRANSACTIONS = 'viewtransactions.extracttable.com'
1213

1314

1415
class JobStatus:

example-code.ipynb

Lines changed: 114 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,8 @@
579579
"| Split Merged Rows \t| Works well on cell values with no spaces \t|\n",
580580
"| Split Merged Columns \t| Works well on cell values with no spaces \t|\n",
581581
"| Fix Decimal Format \t| To fix thousand and decimal separators \t|\n",
582-
"| Fix Date Format \t| To handle and modify incorrect date separators \t|"
582+
"| Fix Date Format \t| To handle and modify incorrect date separators \t|\n",
583+
"| Fix Characters \t| To fix incorrect character detections \t|\n"
583584
],
584585
"metadata": {
585586
"collapsed": false
@@ -606,7 +607,11 @@
606607
{
607608
"cell_type": "markdown",
608609
"source": [
609-
"## 6.1 Split Merged Rows"
610+
"## 6.1 Split Merged Rows\n",
611+
" \"\"\"\n",
612+
" To split the merged rows into possible multiple rows\n",
613+
" :return: reformatted list of dataframes\n",
614+
" \"\"\"\n"
610615
],
611616
"metadata": {
612617
"collapsed": false
@@ -629,7 +634,15 @@
629634
{
630635
"cell_type": "markdown",
631636
"source": [
632-
"## 6.2 Split Merged Columns\n"
637+
"## 6.2 Split Merged Columns\n",
638+
"\n",
639+
" \"\"\"\n",
640+
" To split the merged columns into possible multiple columns\n",
641+
" :param columns_idx: user preferred columns indices.\n",
642+
" Default loops through all columns to find numeric or decimal columns\n",
643+
" :param force_split: To force split through the columns\n",
644+
" :return: reformatted list of dataframes\n",
645+
" \"\"\"\n"
633646
],
634647
"metadata": {
635648
"collapsed": false
@@ -652,7 +665,18 @@
652665
{
653666
"cell_type": "markdown",
654667
"source": [
655-
"## 6.3 Fix Decimal Format\n"
668+
"## 6.3 Fix Decimal Format\n",
669+
"\n",
670+
"\n",
671+
" \"\"\"\n",
672+
" To fix decimal and thousands separator values. Often commas as detected as period\n",
673+
" :param columns_idx: user preferred columns indices.\n",
674+
" Default loops through all columns to find numeric or decimal columns\n",
675+
" :param decimal_separator: preferred decimal separator\n",
676+
" :param thousands_separator: preferred thousands separator\n",
677+
" :param decimal_position: preferred decimal position\n",
678+
" :return: corrected list of dataframes\n",
679+
" \"\"\"\n"
656680
],
657681
"metadata": {
658682
"collapsed": false
@@ -675,7 +699,16 @@
675699
{
676700
"cell_type": "markdown",
677701
"source": [
678-
"## 6.4 Fix Date Format\n"
702+
"## 6.4 Fix Date Format\n",
703+
"\n",
704+
" \"\"\"\n",
705+
" To fix date formats of the column\n",
706+
" Eg: 12|1212020 as 12/12/2020\n",
707+
" :param columns_idx: user preferred columns indices.\n",
708+
" Default loops through all columns to find Date Columns\n",
709+
" :param delimiter: \"/\" or \"-\" whatelse you prefer\n",
710+
" :return: correted list of dataframes\n",
711+
" \"\"\"\n"
679712
],
680713
"metadata": {
681714
"collapsed": false
@@ -695,6 +728,39 @@
695728
}
696729
}
697730
},
731+
{
732+
"cell_type": "markdown",
733+
"source": [
734+
"## 6.5 Fix Characters\n",
735+
"\n",
736+
" \"\"\"\n",
737+
" To replace incorrect character detections\n",
738+
" Eg: $123,45.0I as $123,45.01\n",
739+
" :param columns_idx: user preferred columns indices.\n",
740+
" Default loops through all columns to find Date Columns\n",
741+
" :param replace_ref: the replacement dictionary for reference\n",
742+
" Eg: {\"I\": \"1\"}\n",
743+
" :return: correted list of dataframes\n",
744+
" \"\"\"\n"
745+
],
746+
"metadata": {
747+
"collapsed": false
748+
}
749+
},
750+
{
751+
"cell_type": "code",
752+
"execution_count": null,
753+
"outputs": [],
754+
"source": [
755+
"corrected_table_dataframes = corrections.fix_characters(self, columns_idx=[0, 1], replace_ref={\"I\": \"1\", \"S\": \"$\"})"
756+
],
757+
"metadata": {
758+
"collapsed": false,
759+
"pycharm": {
760+
"name": "#%%\n"
761+
}
762+
}
763+
},
698764
{
699765
"cell_type": "markdown",
700766
"source": [
@@ -776,7 +842,49 @@
776842
{
777843
"cell_type": "markdown",
778844
"source": [
779-
"# 8. Support & Contact\n",
845+
"# 8. View transactions\n",
846+
"\n",
847+
"To view all the transactions that were triggered in the last 24 hours"
848+
],
849+
"metadata": {
850+
"collapsed": false
851+
}
852+
},
853+
{
854+
"cell_type": "code",
855+
"execution_count": null,
856+
"outputs": [],
857+
"source": [
858+
"et_sess.view_transactions()"
859+
],
860+
"metadata": {
861+
"collapsed": false,
862+
"pycharm": {
863+
"name": "#%%\n"
864+
}
865+
}
866+
},
867+
{
868+
"cell_type": "markdown",
869+
"source": [
870+
"Return a list of transactions, with each record having below properties\n",
871+
"\n",
872+
"|Property | Description |\n",
873+
"|---|---|\n",
874+
"|JobStatus | Status of the job|\n",
875+
"|Pages | number of pages of the input; can also be considered as number of credits consumed|\n",
876+
"|createdon | timestamp when the request was processed|\n",
877+
"|requested_filename | Filename received in the request|\n",
878+
"|txn_id | Unique identifier of the transaction, also referred as JobId to retrieve output via `get_result(JobId)`|"
879+
],
880+
"metadata": {
881+
"collapsed": false
882+
}
883+
},
884+
{
885+
"cell_type": "markdown",
886+
"source": [
887+
"# 9. Support & Contact\n",
780888
"\n",
781889
"Please do not hesitate to approach our developer team at [email protected] for any assitance needed or to report a bug"
782890
],

0 commit comments

Comments
 (0)