Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions .idea/docling-ibm-models_slanet_1m.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions docling_ibm_models/slanet_1m/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
## Python

# Environments
.venv
venv

# Byte-compiled / optimized / DLL files
__pycache__/

# Pytest cache
.pytest_cache

# Pytest Coverage
.coverage

## IntelliJ's IDEs

.idea

## Visual Studio Code

.vscode

## macOS

.DS_Store


inference/
inference_results/
output/
data/
/data
evaluation/
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docling_ibm_models/slanet_1m/Fonts/ARIALN.TTF
Binary file not shown.
Binary file added docling_ibm_models/slanet_1m/Fonts/ARIALNB.TTF
Binary file not shown.
Binary file added docling_ibm_models/slanet_1m/Fonts/ARIALNBI.TTF
Binary file not shown.
Binary file added docling_ibm_models/slanet_1m/Fonts/ARIALNI.TTF
Binary file not shown.
Binary file added docling_ibm_models/slanet_1m/Fonts/arial.ttf
Binary file not shown.
Binary file added docling_ibm_models/slanet_1m/Fonts/arialbd.ttf
Binary file not shown.
Binary file added docling_ibm_models/slanet_1m/Fonts/arialbi.ttf
Binary file not shown.
Binary file added docling_ibm_models/slanet_1m/Fonts/ariali.ttf
Binary file not shown.
Binary file added docling_ibm_models/slanet_1m/Fonts/ariblk.ttf
Binary file not shown.
19 changes: 19 additions & 0 deletions docling_ibm_models/slanet_1m/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SLANet_1M

- Install PaddlePaddle with CUDA 12.3

```bash linenums="1"
python -m pip install paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/stable/cu123/
```

- Then
```bash linenums="1"
pip install -r requirements.txt
```

- To train:
```bash linenums="1"
python train.py -c configs/SLANet_1M.yml -o Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True
```

Pre-trained Model on PubTanNet + SynthTabNet can be found [here](https://drive.google.com/drive/folders/1aIzP3a3Ci0n9hXD2j57Dq4uCfQlt8yoW?usp=drive_link)
Empty file.
145 changes: 145 additions & 0 deletions docling_ibm_models/slanet_1m/configs/SLANet_1M.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
Global:
use_gpu: true
epoch_num: 50
log_smooth_window: 20
print_batch_step: 20
save_model_dir: ./output/SLANet_1M
save_epoch_step: 400
# evaluation is run every 1000 iterations after the 0th iteration
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir: ./output/SLANet_1M/infer
use_visualdl: False
infer_img:
# for data or label process
character_dict_path: dict/table_structure_dict.txt
character_type: en
max_text_length: &max_text_length 500
box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
infer_mode: False
use_sync_bn: True
save_res_path: 'output/infer'
d2s_train_image_shape: [3, -1, -1]
amp_custom_white_list: ['concat', 'elementwise_sub', 'set_value']

Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
clip_norm: 5.0
lr:
name: Piecewise
learning_rate: 0.001
decay_epochs : [29, 39]
values : [0.001, 0.0001, 0.00005]
regularizer:
name: 'L2'
factor: 0.00000

Architecture:
model_type: table
algorithm: SLANet
Backbone:
name: PPLCNet
scale: 1.0
pretrained: true
use_ssld: true
Neck:
name: CSPPAN
out_channels: 96
Head:
name: SLAHead
hidden_size: 256
max_text_length: *max_text_length
loc_reg_num: &loc_reg_num 4

Loss:
name: SLALoss
structure_weight: 1.0
loc_weight: 2.0
loc_loss: smooth_l1

PostProcess:
name: TableLabelDecode
merge_no_span_structure: &merge_no_span_structure True

Metric:
name: TableMetric
main_indicator: acc
compute_bbox_metric: False
loc_reg_num: *loc_reg_num
box_format: *box_format

Train:
dataset:
name: PubTabDataSet
data_dir: data/final_merged/train/
label_file_list: [data/final_merged/train_annotations.jsonl]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- TableLabelEncode:
learn_empty_box: False
merge_no_span_structure: *merge_no_span_structure
replace_empty_cell_token: False
loc_reg_num: *loc_reg_num
max_text_length: *max_text_length
- TableBoxEncode:
in_box_format: *box_format
out_box_format: *box_format
- ResizeTableImage:
max_len: 488
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- PaddingTableImage:
size: [488, 488]
- ToCHWImage:
- KeepKeys:
keep_keys: ['image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape']
loader:
shuffle: True
batch_size_per_card: 72
drop_last: True
num_workers: 1

Eval:
dataset:
name: PubTabDataSet
data_dir: data/final_merged/val/
label_file_list: [data/final_merged/val_annotations.jsonl]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- TableLabelEncode:
learn_empty_box: False
merge_no_span_structure: *merge_no_span_structure
replace_empty_cell_token: False
loc_reg_num: *loc_reg_num
max_text_length: *max_text_length
- TableBoxEncode:
in_box_format: *box_format
out_box_format: *box_format
- ResizeTableImage:
max_len: 488
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- PaddingTableImage:
size: [488, 488]
- ToCHWImage:
- KeepKeys:
keep_keys: ['image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 72
num_workers: 1
28 changes: 28 additions & 0 deletions docling_ibm_models/slanet_1m/dict/table_structure_dict.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<thead>
<tr>
<td>
</td>
</tr>
</thead>
<tbody>
</tbody>
<td
colspan="5"
>
colspan="2"
colspan="3"
rowspan="2"
colspan="4"
colspan="6"
rowspan="3"
colspan="9"
colspan="10"
colspan="7"
rowspan="4"
rowspan="5"
rowspan="9"
colspan="8"
rowspan="8"
rowspan="6"
rowspan="7"
rowspan="10"
95 changes: 95 additions & 0 deletions docling_ibm_models/slanet_1m/dict_table/en_dict.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
0
1
2
3
4
5
6
7
8
9
:
;
<
=
>
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
[
\
]
^
_
`
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
{
|
}
~
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/

Loading