diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/docling-ibm-models_slanet_1m.iml b/.idea/docling-ibm-models_slanet_1m.iml
new file mode 100644
index 0000000..266b601
--- /dev/null
+++ b/.idea/docling-ibm-models_slanet_1m.iml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.9" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="NUMPY" />
+    <option name="myDocStringFormat" value="NumPy" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="py.test" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..812ab5a
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.9" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..e796249
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/docling-ibm-models_slanet_1m.iml" filepath="$PROJECT_DIR$/.idea/docling-ibm-models_slanet_1m.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/docling_ibm_models/slanet_1m/.gitignore b/docling_ibm_models/slanet_1m/.gitignore
new file mode 100644
index 0000000..85db5a4
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/.gitignore
@@ -0,0 +1,34 @@
+## Python
+
+# Environments
+.venv
+venv
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+
+# Pytest cache
+.pytest_cache
+
+# Pytest Coverage
+.coverage
+
+## IntelliJ's IDEs
+
+.idea
+
+## Visual Studio Code
+
+.vscode
+
+## macOS
+
+.DS_Store
+
+
+inference/
+inference_results/
+output/
+data/
+/data
+evaluation/
diff --git a/docling_ibm_models/slanet_1m/12_tables/12_table_1.jpg b/docling_ibm_models/slanet_1m/12_tables/12_table_1.jpg
new file mode 100644
index 0000000..abdbdcc
Binary files /dev/null and b/docling_ibm_models/slanet_1m/12_tables/12_table_1.jpg differ
diff --git a/docling_ibm_models/slanet_1m/12_tables/12_table_2.jpg b/docling_ibm_models/slanet_1m/12_tables/12_table_2.jpg
new file mode 100644
index 0000000..1d5ffd6
Binary files /dev/null and b/docling_ibm_models/slanet_1m/12_tables/12_table_2.jpg differ
diff --git a/docling_ibm_models/slanet_1m/Fonts/ARIALN.TTF b/docling_ibm_models/slanet_1m/Fonts/ARIALN.TTF
new file mode 100644
index 0000000..94907a3
Binary files /dev/null and b/docling_ibm_models/slanet_1m/Fonts/ARIALN.TTF differ
diff --git a/docling_ibm_models/slanet_1m/Fonts/ARIALNB.TTF b/docling_ibm_models/slanet_1m/Fonts/ARIALNB.TTF
new file mode 100644
index 0000000..62437f0
Binary files /dev/null and b/docling_ibm_models/slanet_1m/Fonts/ARIALNB.TTF differ
diff --git a/docling_ibm_models/slanet_1m/Fonts/ARIALNBI.TTF b/docling_ibm_models/slanet_1m/Fonts/ARIALNBI.TTF
new file mode 100644
index 0000000..d3f019a
Binary files /dev/null and b/docling_ibm_models/slanet_1m/Fonts/ARIALNBI.TTF differ
diff --git a/docling_ibm_models/slanet_1m/Fonts/ARIALNI.TTF b/docling_ibm_models/slanet_1m/Fonts/ARIALNI.TTF
new file mode 100644
index 0000000..4acd468
Binary files /dev/null and b/docling_ibm_models/slanet_1m/Fonts/ARIALNI.TTF differ
diff --git a/docling_ibm_models/slanet_1m/Fonts/arial.ttf b/docling_ibm_models/slanet_1m/Fonts/arial.ttf
new file mode 100644
index 0000000..27372d9
Binary files /dev/null and b/docling_ibm_models/slanet_1m/Fonts/arial.ttf differ
diff --git a/docling_ibm_models/slanet_1m/Fonts/arialbd.ttf b/docling_ibm_models/slanet_1m/Fonts/arialbd.ttf
new file mode 100644
index 0000000..03bb5e2
Binary files /dev/null and b/docling_ibm_models/slanet_1m/Fonts/arialbd.ttf differ
diff --git a/docling_ibm_models/slanet_1m/Fonts/arialbi.ttf b/docling_ibm_models/slanet_1m/Fonts/arialbi.ttf
new file mode 100644
index 0000000..dc80b4e
Binary files /dev/null and b/docling_ibm_models/slanet_1m/Fonts/arialbi.ttf differ
diff --git a/docling_ibm_models/slanet_1m/Fonts/ariali.ttf b/docling_ibm_models/slanet_1m/Fonts/ariali.ttf
new file mode 100644
index 0000000..652df71
Binary files /dev/null and b/docling_ibm_models/slanet_1m/Fonts/ariali.ttf differ
diff --git a/docling_ibm_models/slanet_1m/Fonts/ariblk.ttf b/docling_ibm_models/slanet_1m/Fonts/ariblk.ttf
new file mode 100644
index 0000000..e7ae345
Binary files /dev/null and b/docling_ibm_models/slanet_1m/Fonts/ariblk.ttf differ
diff --git a/docling_ibm_models/slanet_1m/README.md b/docling_ibm_models/slanet_1m/README.md
new file mode 100644
index 0000000..d1db72b
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/README.md
@@ -0,0 +1,19 @@
+# SLANet_1M
+
+- Install PaddlePaddle with CUDA 12.3
+
+  ```bash linenums="1"
+  python -m pip install paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/stable/cu123/
+  ```
+  
+- Then 
+  ```bash linenums="1"
+  pip install -r requirements.txt
+  ```
+
+- To train: 
+  ```bash linenums="1"
+  python train.py -c configs/SLANet_1M.yml -o Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True
+  ```
+
+Pre-trained Model on PubTanNet + SynthTabNet can be found  [here](https://drive.google.com/drive/folders/1aIzP3a3Ci0n9hXD2j57Dq4uCfQlt8yoW?usp=drive_link)
\ No newline at end of file
diff --git a/docling_ibm_models/slanet_1m/__init__.py b/docling_ibm_models/slanet_1m/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/docling_ibm_models/slanet_1m/configs/SLANet_1M.yml b/docling_ibm_models/slanet_1m/configs/SLANet_1M.yml
new file mode 100644
index 0000000..946daf8
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/configs/SLANet_1M.yml
@@ -0,0 +1,145 @@
+Global:
+  use_gpu: true
+  epoch_num: 50
+  log_smooth_window: 20
+  print_batch_step: 20
+  save_model_dir: ./output/SLANet_1M
+  save_epoch_step: 400
+  # evaluation is run every 1000 iterations after the 0th iteration
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir: ./output/SLANet_1M/infer
+  use_visualdl: False
+  infer_img:
+  # for data or label process
+  character_dict_path: dict/table_structure_dict.txt
+  character_type: en
+  max_text_length: &max_text_length 500
+  box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
+  infer_mode: False
+  use_sync_bn: True
+  save_res_path: 'output/infer'
+  d2s_train_image_shape: [3, -1, -1]
+  amp_custom_white_list: ['concat', 'elementwise_sub', 'set_value']
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  clip_norm: 5.0
+  lr:
+    name: Piecewise
+    learning_rate: 0.001
+    decay_epochs : [29, 39]
+    values : [0.001, 0.0001, 0.00005]
+  regularizer:
+    name: 'L2'
+    factor: 0.00000
+
+Architecture:
+  model_type: table
+  algorithm: SLANet
+  Backbone:
+    name: PPLCNet
+    scale: 1.0
+    pretrained: true
+    use_ssld: true
+  Neck:
+    name: CSPPAN
+    out_channels: 96
+  Head:
+    name: SLAHead
+    hidden_size: 256
+    max_text_length: *max_text_length
+    loc_reg_num: &loc_reg_num 4
+
+Loss:
+  name: SLALoss
+  structure_weight: 1.0
+  loc_weight: 2.0
+  loc_loss: smooth_l1
+
+PostProcess:
+  name: TableLabelDecode
+  merge_no_span_structure: &merge_no_span_structure True
+
+Metric:
+  name: TableMetric
+  main_indicator: acc
+  compute_bbox_metric: False
+  loc_reg_num: *loc_reg_num
+  box_format: *box_format
+
+Train:
+  dataset:
+    name: PubTabDataSet
+    data_dir: data/final_merged/train/
+    label_file_list: [data/final_merged/train_annotations.jsonl]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - TableLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: *merge_no_span_structure
+          replace_empty_cell_token: False
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
+      - TableBoxEncode:
+          in_box_format: *box_format
+          out_box_format: *box_format
+      - ResizeTableImage:
+          max_len: 488
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - PaddingTableImage:
+          size: [488, 488]
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape']
+  loader:
+    shuffle: True
+    batch_size_per_card: 72
+    drop_last: True
+    num_workers: 1
+
+Eval:
+  dataset:
+    name: PubTabDataSet
+    data_dir: data/final_merged/val/
+    label_file_list: [data/final_merged/val_annotations.jsonl]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - TableLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: *merge_no_span_structure
+          replace_empty_cell_token: False
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
+      - TableBoxEncode:
+          in_box_format: *box_format
+          out_box_format: *box_format
+      - ResizeTableImage:
+          max_len: 488
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - PaddingTableImage:
+          size: [488, 488]
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 72
+    num_workers: 1
diff --git a/docling_ibm_models/slanet_1m/dict/table_structure_dict.txt b/docling_ibm_models/slanet_1m/dict/table_structure_dict.txt
new file mode 100644
index 0000000..fec6f7d
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/dict/table_structure_dict.txt
@@ -0,0 +1,28 @@
+<thead>
+<tr>
+<td>
+</td>
+</tr>
+</thead>
+<tbody>
+</tbody>
+<td
+ colspan="5"
+>
+ colspan="2"
+ colspan="3"
+ rowspan="2"
+ colspan="4"
+ colspan="6"
+ rowspan="3"
+ colspan="9"
+ colspan="10"
+ colspan="7"
+ rowspan="4"
+ rowspan="5"
+ rowspan="9"
+ colspan="8"
+ rowspan="8"
+ rowspan="6"
+ rowspan="7"
+ rowspan="10"
diff --git a/docling_ibm_models/slanet_1m/dict_table/en_dict.txt b/docling_ibm_models/slanet_1m/dict_table/en_dict.txt
new file mode 100644
index 0000000..7677d31
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/dict_table/en_dict.txt
@@ -0,0 +1,95 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+[
+\
+]
+^
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+ 
diff --git a/docling_ibm_models/slanet_1m/dict_table/ppocr_keys_v1.txt b/docling_ibm_models/slanet_1m/dict_table/ppocr_keys_v1.txt
new file mode 100644
index 0000000..b75af21
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/dict_table/ppocr_keys_v1.txt
@@ -0,0 +1,6623 @@
+'
+疗
+绚
+诚
+娇
+溜
+题
+贿
+者
+廖
+更
+纳
+加
+奉
+公
+一
+就
+汴
+计
+与
+路
+房
+原
+妇
+2
+0
+8
+-
+7
+其
+>
+:
+]
+,
+，
+骑
+刈
+全
+消
+昏
+傈
+安
+久
+钟
+嗅
+不
+影
+处
+驽
+蜿
+资
+关
+椤
+地
+瘸
+专
+问
+忖
+票
+嫉
+炎
+韵
+要
+月
+田
+节
+陂
+鄙
+捌
+备
+拳
+伺
+眼
+网
+盎
+大
+傍
+心
+东
+愉
+汇
+蹿
+科
+每
+业
+里
+航
+晏
+字
+平
+录
+先
+1
+3
+彤
+鲶
+产
+稍
+督
+腴
+有
+象
+岳
+注
+绍
+在
+泺
+文
+定
+核
+名
+水
+过
+理
+让
+偷
+率
+等
+这
+发
+”
+为
+含
+肥
+酉
+相
+鄱
+七
+编
+猥
+锛
+日
+镀
+蒂
+掰
+倒
+辆
+栾
+栗
+综
+涩
+州
+雌
+滑
+馀
+了
+机
+块
+司
+宰
+甙
+兴
+矽
+抚
+保
+用
+沧
+秩
+如
+收
+息
+滥
+页
+疑
+埠
+!
+！
+姥
+异
+橹
+钇
+向
+下
+跄
+的
+椴
+沫
+国
+绥
+獠
+报
+开
+民
+蜇
+何
+分
+凇
+长
+讥
+藏
+掏
+施
+羽
+中
+讲
+派
+嘟
+人
+提
+浼
+间
+世
+而
+古
+多
+倪
+唇
+饯
+控
+庚
+首
+赛
+蜓
+味
+断
+制
+觉
+技
+替
+艰
+溢
+潮
+夕
+钺
+外
+摘
+枋
+动
+双
+单
+啮
+户
+枇
+确
+锦
+曜
+杜
+或
+能
+效
+霜
+盒
+然
+侗
+电
+晁
+放
+步
+鹃
+新
+杖
+蜂
+吒
+濂
+瞬
+评
+总
+隍
+对
+独
+合
+也
+是
+府
+青
+天
+诲
+墙
+组
+滴
+级
+邀
+帘
+示
+已
+时
+骸
+仄
+泅
+和
+遨
+店
+雇
+疫
+持
+巍
+踮
+境
+只
+亨
+目
+鉴
+崤
+闲
+体
+泄
+杂
+作
+般
+轰
+化
+解
+迂
+诿
+蛭
+璀
+腾
+告
+版
+服
+省
+师
+小
+规
+程
+线
+海
+办
+引
+二
+桧
+牌
+砺
+洄
+裴
+修
+图
+痫
+胡
+许
+犊
+事
+郛
+基
+柴
+呼
+食
+研
+奶
+律
+蛋
+因
+葆
+察
+戏
+褒
+戒
+再
+李
+骁
+工
+貂
+油
+鹅
+章
+啄
+休
+场
+给
+睡
+纷
+豆
+器
+捎
+说
+敏
+学
+会
+浒
+设
+诊
+格
+廓
+查
+来
+霓
+室
+溆
+￠
+诡
+寥
+焕
+舜
+柒
+狐
+回
+戟
+砾
+厄
+实
+翩
+尿
+五
+入
+径
+惭
+喹
+股
+宇
+篝
+|
+;
+美
+期
+云
+九
+祺
+扮
+靠
+锝
+槌
+系
+企
+酰
+阊
+暂
+蚕
+忻
+豁
+本
+羹
+执
+条
+钦
+H
+獒
+限
+进
+季
+楦
+于
+芘
+玖
+铋
+茯
+未
+答
+粘
+括
+样
+精
+欠
+矢
+甥
+帷
+嵩
+扣
+令
+仔
+风
+皈
+行
+支
+部
+蓉
+刮
+站
+蜡
+救
+钊
+汗
+松
+嫌
+成
+可
+.
+鹤
+院
+从
+交
+政
+怕
+活
+调
+球
+局
+验
+髌
+第
+韫
+谗
+串
+到
+圆
+年
+米
+/
+*
+友
+忿
+检
+区
+看
+自
+敢
+刃
+个
+兹
+弄
+流
+留
+同
+没
+齿
+星
+聆
+轼
+湖
+什
+三
+建
+蛔
+儿
+椋
+汕
+震
+颧
+鲤
+跟
+力
+情
+璺
+铨
+陪
+务
+指
+族
+训
+滦
+鄣
+濮
+扒
+商
+箱
+十
+召
+慷
+辗
+所
+莞
+管
+护
+臭
+横
+硒
+嗓
+接
+侦
+六
+露
+党
+馋
+驾
+剖
+高
+侬
+妪
+幂
+猗
+绺
+骐
+央
+酐
+孝
+筝
+课
+徇
+缰
+门
+男
+西
+项
+句
+谙
+瞒
+秃
+篇
+教
+碲
+罚
+声
+呐
+景
+前
+富
+嘴
+鳌
+稀
+免
+朋
+啬
+睐
+去
+赈
+鱼
+住
+肩
+愕
+速
+旁
+波
+厅
+健
+茼
+厥
+鲟
+谅
+投
+攸
+炔
+数
+方
+击
+呋
+谈
+绩
+别
+愫
+僚
+躬
+鹧
+胪
+炳
+招
+喇
+膨
+泵
+蹦
+毛
+结
+5
+4
+谱
+识
+陕
+粽
+婚
+拟
+构
+且
+搜
+任
+潘
+比
+郢
+妨
+醪
+陀
+桔
+碘
+扎
+选
+哈
+骷
+楷
+亿
+明
+缆
+脯
+监
+睫
+逻
+婵
+共
+赴
+淝
+凡
+惦
+及
+达
+揖
+谩
+澹
+减
+焰
+蛹
+番
+祁
+柏
+员
+禄
+怡
+峤
+龙
+白
+叽
+生
+闯
+起
+细
+装
+谕
+竟
+聚
+钙
+上
+导
+渊
+按
+艾
+辘
+挡
+耒
+盹
+饪
+臀
+记
+邮
+蕙
+受
+各
+医
+搂
+普
+滇
+朗
+茸
+带
+翻
+酚
+(
+光
+堤
+墟
+蔷
+万
+幻
+〓
+瑙
+辈
+昧
+盏
+亘
+蛀
+吉
+铰
+请
+子
+假
+闻
+税
+井
+诩
+哨
+嫂
+好
+面
+琐
+校
+馊
+鬣
+缂
+营
+访
+炖
+占
+农
+缀
+否
+经
+钚
+棵
+趟
+张
+亟
+吏
+茶
+谨
+捻
+论
+迸
+堂
+玉
+信
+吧
+瞠
+乡
+姬
+寺
+咬
+溏
+苄
+皿
+意
+赉
+宝
+尔
+钰
+艺
+特
+唳
+踉
+都
+荣
+倚
+登
+荐
+丧
+奇
+涵
+批
+炭
+近
+符
+傩
+感
+道
+着
+菊
+虹
+仲
+众
+懈
+濯
+颞
+眺
+南
+释
+北
+缝
+标
+既
+茗
+整
+撼
+迤
+贲
+挎
+耱
+拒
+某
+妍
+卫
+哇
+英
+矶
+藩
+治
+他
+元
+领
+膜
+遮
+穗
+蛾
+飞
+荒
+棺
+劫
+么
+市
+火
+温
+拈
+棚
+洼
+转
+果
+奕
+卸
+迪
+伸
+泳
+斗
+邡
+侄
+涨
+屯
+萋
+胭
+氡
+崮
+枞
+惧
+冒
+彩
+斜
+手
+豚
+随
+旭
+淑
+妞
+形
+菌
+吲
+沱
+争
+驯
+歹
+挟
+兆
+柱
+传
+至
+包
+内
+响
+临
+红
+功
+弩
+衡
+寂
+禁
+老
+棍
+耆
+渍
+织
+害
+氵
+渑
+布
+载
+靥
+嗬
+虽
+苹
+咨
+娄
+库
+雉
+榜
+帜
+嘲
+套
+瑚
+亲
+簸
+欧
+边
+6
+腿
+旮
+抛
+吹
+瞳
+得
+镓
+梗
+厨
+继
+漾
+愣
+憨
+士
+策
+窑
+抑
+躯
+襟
+脏
+参
+贸
+言
+干
+绸
+鳄
+穷
+藜
+音
+折
+详
+)
+举
+悍
+甸
+癌
+黎
+谴
+死
+罩
+迁
+寒
+驷
+袖
+媒
+蒋
+掘
+模
+纠
+恣
+观
+祖
+蛆
+碍
+位
+稿
+主
+澧
+跌
+筏
+京
+锏
+帝
+贴
+证
+糠
+才
+黄
+鲸
+略
+炯
+饱
+四
+出
+园
+犀
+牧
+容
+汉
+杆
+浈
+汰
+瑷
+造
+虫
+瘩
+怪
+驴
+济
+应
+花
+沣
+谔
+夙
+旅
+价
+矿
+以
+考
+s
+u
+呦
+晒
+巡
+茅
+准
+肟
+瓴
+詹
+仟
+褂
+译
+桌
+混
+宁
+怦
+郑
+抿
+些
+余
+鄂
+饴
+攒
+珑
+群
+阖
+岔
+琨
+藓
+预
+环
+洮
+岌
+宀
+杲
+瀵
+最
+常
+囡
+周
+踊
+女
+鼓
+袭
+喉
+简
+范
+薯
+遐
+疏
+粱
+黜
+禧
+法
+箔
+斤
+遥
+汝
+奥
+直
+贞
+撑
+置
+绱
+集
+她
+馅
+逗
+钧
+橱
+魉
+[
+恙
+躁
+唤
+9
+旺
+膘
+待
+脾
+惫
+购
+吗
+依
+盲
+度
+瘿
+蠖
+俾
+之
+镗
+拇
+鲵
+厝
+簧
+续
+款
+展
+啃
+表
+剔
+品
+钻
+腭
+损
+清
+锶
+统
+涌
+寸
+滨
+贪
+链
+吠
+冈
+伎
+迥
+咏
+吁
+览
+防
+迅
+失
+汾
+阔
+逵
+绀
+蔑
+列
+川
+凭
+努
+熨
+揪
+利
+俱
+绉
+抢
+鸨
+我
+即
+责
+膦
+易
+毓
+鹊
+刹
+玷
+岿
+空
+嘞
+绊
+排
+术
+估
+锷
+违
+们
+苟
+铜
+播
+肘
+件
+烫
+审
+鲂
+广
+像
+铌
+惰
+铟
+巳
+胍
+鲍
+康
+憧
+色
+恢
+想
+拷
+尤
+疳
+知
+S
+Y
+F
+D
+A
+峄
+裕
+帮
+握
+搔
+氐
+氘
+难
+墒
+沮
+雨
+叁
+缥
+悴
+藐
+湫
+娟
+苑
+稠
+颛
+簇
+后
+阕
+闭
+蕤
+缚
+怎
+佞
+码
+嘤
+蔡
+痊
+舱
+螯
+帕
+赫
+昵
+升
+烬
+岫
+、
+疵
+蜻
+髁
+蕨
+隶
+烛
+械
+丑
+盂
+梁
+强
+鲛
+由
+拘
+揉
+劭
+龟
+撤
+钩
+呕
+孛
+费
+妻
+漂
+求
+阑
+崖
+秤
+甘
+通
+深
+补
+赃
+坎
+床
+啪
+承
+吼
+量
+暇
+钼
+烨
+阂
+擎
+脱
+逮
+称
+P
+神
+属
+矗
+华
+届
+狍
+葑
+汹
+育
+患
+窒
+蛰
+佼
+静
+槎
+运
+鳗
+庆
+逝
+曼
+疱
+克
+代
+官
+此
+麸
+耧
+蚌
+晟
+例
+础
+榛
+副
+测
+唰
+缢
+迹
+灬
+霁
+身
+岁
+赭
+扛
+又
+菡
+乜
+雾
+板
+读
+陷
+徉
+贯
+郁
+虑
+变
+钓
+菜
+圾
+现
+琢
+式
+乐
+维
+渔
+浜
+左
+吾
+脑
+钡
+警
+T
+啵
+拴
+偌
+漱
+湿
+硕
+止
+骼
+魄
+积
+燥
+联
+踢
+玛
+则
+窿
+见
+振
+畿
+送
+班
+钽
+您
+赵
+刨
+印
+讨
+踝
+籍
+谡
+舌
+崧
+汽
+蔽
+沪
+酥
+绒
+怖
+财
+帖
+肱
+私
+莎
+勋
+羔
+霸
+励
+哼
+帐
+将
+帅
+渠
+纪
+婴
+娩
+岭
+厘
+滕
+吻
+伤
+坝
+冠
+戊
+隆
+瘁
+介
+涧
+物
+黍
+并
+姗
+奢
+蹑
+掣
+垸
+锴
+命
+箍
+捉
+病
+辖
+琰
+眭
+迩
+艘
+绌
+繁
+寅
+若
+毋
+思
+诉
+类
+诈
+燮
+轲
+酮
+狂
+重
+反
+职
+筱
+县
+委
+磕
+绣
+奖
+晋
+濉
+志
+徽
+肠
+呈
+獐
+坻
+口
+片
+碰
+几
+村
+柿
+劳
+料
+获
+亩
+惕
+晕
+厌
+号
+罢
+池
+正
+鏖
+煨
+家
+棕
+复
+尝
+懋
+蜥
+锅
+岛
+扰
+队
+坠
+瘾
+钬
+@
+卧
+疣
+镇
+譬
+冰
+彷
+频
+黯
+据
+垄
+采
+八
+缪
+瘫
+型
+熹
+砰
+楠
+襁
+箐
+但
+嘶
+绳
+啤
+拍
+盥
+穆
+傲
+洗
+盯
+塘
+怔
+筛
+丿
+台
+恒
+喂
+葛
+永
+￥
+烟
+酒
+桦
+书
+砂
+蚝
+缉
+态
+瀚
+袄
+圳
+轻
+蛛
+超
+榧
+遛
+姒
+奘
+铮
+右
+荽
+望
+偻
+卡
+丶
+氰
+附
+做
+革
+索
+戚
+坨
+桷
+唁
+垅
+榻
+岐
+偎
+坛
+莨
+山
+殊
+微
+骇
+陈
+爨
+推
+嗝
+驹
+澡
+藁
+呤
+卤
+嘻
+糅
+逛
+侵
+郓
+酌
+德
+摇
+※
+鬃
+被
+慨
+殡
+羸
+昌
+泡
+戛
+鞋
+河
+宪
+沿
+玲
+鲨
+翅
+哽
+源
+铅
+语
+照
+邯
+址
+荃
+佬
+顺
+鸳
+町
+霭
+睾
+瓢
+夸
+椁
+晓
+酿
+痈
+咔
+侏
+券
+噎
+湍
+签
+嚷
+离
+午
+尚
+社
+锤
+背
+孟
+使
+浪
+缦
+潍
+鞅
+军
+姹
+驶
+笑
+鳟
+鲁
+》
+孽
+钜
+绿
+洱
+礴
+焯
+椰
+颖
+囔
+乌
+孔
+巴
+互
+性
+椽
+哞
+聘
+昨
+早
+暮
+胶
+炀
+隧
+低
+彗
+昝
+铁
+呓
+氽
+藉
+喔
+癖
+瑗
+姨
+权
+胱
+韦
+堑
+蜜
+酋
+楝
+砝
+毁
+靓
+歙
+锲
+究
+屋
+喳
+骨
+辨
+碑
+武
+鸠
+宫
+辜
+烊
+适
+坡
+殃
+培
+佩
+供
+走
+蜈
+迟
+翼
+况
+姣
+凛
+浔
+吃
+飘
+债
+犟
+金
+促
+苛
+崇
+坂
+莳
+畔
+绂
+兵
+蠕
+斋
+根
+砍
+亢
+欢
+恬
+崔
+剁
+餐
+榫
+快
+扶
+‖
+濒
+缠
+鳜
+当
+彭
+驭
+浦
+篮
+昀
+锆
+秸
+钳
+弋
+娣
+瞑
+夷
+龛
+苫
+拱
+致
+%
+嵊
+障
+隐
+弑
+初
+娓
+抉
+汩
+累
+蓖
+"
+唬
+助
+苓
+昙
+押
+毙
+破
+城
+郧
+逢
+嚏
+獭
+瞻
+溱
+婿
+赊
+跨
+恼
+璧
+萃
+姻
+貉
+灵
+炉
+密
+氛
+陶
+砸
+谬
+衔
+点
+琛
+沛
+枳
+层
+岱
+诺
+脍
+榈
+埂
+征
+冷
+裁
+打
+蹴
+素
+瘘
+逞
+蛐
+聊
+激
+腱
+萘
+踵
+飒
+蓟
+吆
+取
+咙
+簋
+涓
+矩
+曝
+挺
+揣
+座
+你
+史
+舵
+焱
+尘
+苏
+笈
+脚
+溉
+榨
+诵
+樊
+邓
+焊
+义
+庶
+儋
+蟋
+蒲
+赦
+呷
+杞
+诠
+豪
+还
+试
+颓
+茉
+太
+除
+紫
+逃
+痴
+草
+充
+鳕
+珉
+祗
+墨
+渭
+烩
+蘸
+慕
+璇
+镶
+穴
+嵘
+恶
+骂
+险
+绋
+幕
+碉
+肺
+戳
+刘
+潞
+秣
+纾
+潜
+銮
+洛
+须
+罘
+销
+瘪
+汞
+兮
+屉
+r
+林
+厕
+质
+探
+划
+狸
+殚
+善
+煊
+烹
+〒
+锈
+逯
+宸
+辍
+泱
+柚
+袍
+远
+蹋
+嶙
+绝
+峥
+娥
+缍
+雀
+徵
+认
+镱
+谷
+=
+贩
+勉
+撩
+鄯
+斐
+洋
+非
+祚
+泾
+诒
+饿
+撬
+威
+晷
+搭
+芍
+锥
+笺
+蓦
+候
+琊
+档
+礁
+沼
+卵
+荠
+忑
+朝
+凹
+瑞
+头
+仪
+弧
+孵
+畏
+铆
+突
+衲
+车
+浩
+气
+茂
+悖
+厢
+枕
+酝
+戴
+湾
+邹
+飚
+攘
+锂
+写
+宵
+翁
+岷
+无
+喜
+丈
+挑
+嗟
+绛
+殉
+议
+槽
+具
+醇
+淞
+笃
+郴
+阅
+饼
+底
+壕
+砚
+弈
+询
+缕
+庹
+翟
+零
+筷
+暨
+舟
+闺
+甯
+撞
+麂
+茌
+蔼
+很
+珲
+捕
+棠
+角
+阉
+媛
+娲
+诽
+剿
+尉
+爵
+睬
+韩
+诰
+匣
+危
+糍
+镯
+立
+浏
+阳
+少
+盆
+舔
+擘
+匪
+申
+尬
+铣
+旯
+抖
+赘
+瓯
+居
+ˇ
+哮
+游
+锭
+茏
+歌
+坏
+甚
+秒
+舞
+沙
+仗
+劲
+潺
+阿
+燧
+郭
+嗖
+霏
+忠
+材
+奂
+耐
+跺
+砀
+输
+岖
+媳
+氟
+极
+摆
+灿
+今
+扔
+腻
+枝
+奎
+药
+熄
+吨
+话
+q
+额
+慑
+嘌
+协
+喀
+壳
+埭
+视
+著
+於
+愧
+陲
+翌
+峁
+颅
+佛
+腹
+聋
+侯
+咎
+叟
+秀
+颇
+存
+较
+罪
+哄
+岗
+扫
+栏
+钾
+羌
+己
+璨
+枭
+霉
+煌
+涸
+衿
+键
+镝
+益
+岢
+奏
+连
+夯
+睿
+冥
+均
+糖
+狞
+蹊
+稻
+爸
+刿
+胥
+煜
+丽
+肿
+璃
+掸
+跚
+灾
+垂
+樾
+濑
+乎
+莲
+窄
+犹
+撮
+战
+馄
+软
+络
+显
+鸢
+胸
+宾
+妲
+恕
+埔
+蝌
+份
+遇
+巧
+瞟
+粒
+恰
+剥
+桡
+博
+讯
+凯
+堇
+阶
+滤
+卖
+斌
+骚
+彬
+兑
+磺
+樱
+舷
+两
+娱
+福
+仃
+差
+找
+桁
+÷
+净
+把
+阴
+污
+戬
+雷
+碓
+蕲
+楚
+罡
+焖
+抽
+妫
+咒
+仑
+闱
+尽
+邑
+菁
+爱
+贷
+沥
+鞑
+牡
+嗉
+崴
+骤
+塌
+嗦
+订
+拮
+滓
+捡
+锻
+次
+坪
+杩
+臃
+箬
+融
+珂
+鹗
+宗
+枚
+降
+鸬
+妯
+阄
+堰
+盐
+毅
+必
+杨
+崃
+俺
+甬
+状
+莘
+货
+耸
+菱
+腼
+铸
+唏
+痤
+孚
+澳
+懒
+溅
+翘
+疙
+杷
+淼
+缙
+骰
+喊
+悉
+砻
+坷
+艇
+赁
+界
+谤
+纣
+宴
+晃
+茹
+归
+饭
+梢
+铡
+街
+抄
+肼
+鬟
+苯
+颂
+撷
+戈
+炒
+咆
+茭
+瘙
+负
+仰
+客
+琉
+铢
+封
+卑
+珥
+椿
+镧
+窨
+鬲
+寿
+御
+袤
+铃
+萎
+砖
+餮
+脒
+裳
+肪
+孕
+嫣
+馗
+嵇
+恳
+氯
+江
+石
+褶
+冢
+祸
+阻
+狈
+羞
+银
+靳
+透
+咳
+叼
+敷
+芷
+啥
+它
+瓤
+兰
+痘
+懊
+逑
+肌
+往
+捺
+坊
+甩
+呻
+〃
+沦
+忘
+膻
+祟
+菅
+剧
+崆
+智
+坯
+臧
+霍
+墅
+攻
+眯
+倘
+拢
+骠
+铐
+庭
+岙
+瓠
+′
+缺
+泥
+迢
+捶
+?
+？
+郏
+喙
+掷
+沌
+纯
+秘
+种
+听
+绘
+固
+螨
+团
+香
+盗
+妒
+埚
+蓝
+拖
+旱
+荞
+铀
+血
+遏
+汲
+辰
+叩
+拽
+幅
+硬
+惶
+桀
+漠
+措
+泼
+唑
+齐
+肾
+念
+酱
+虚
+屁
+耶
+旗
+砦
+闵
+婉
+馆
+拭
+绅
+韧
+忏
+窝
+醋
+葺
+顾
+辞
+倜
+堆
+辋
+逆
+玟
+贱
+疾
+董
+惘
+倌
+锕
+淘
+嘀
+莽
+俭
+笏
+绑
+鲷
+杈
+择
+蟀
+粥
+嗯
+驰
+逾
+案
+谪
+褓
+胫
+哩
+昕
+颚
+鲢
+绠
+躺
+鹄
+崂
+儒
+俨
+丝
+尕
+泌
+啊
+萸
+彰
+幺
+吟
+骄
+苣
+弦
+脊
+瑰
+〈
+诛
+镁
+析
+闪
+剪
+侧
+哟
+框
+螃
+守
+嬗
+燕
+狭
+铈
+缮
+概
+迳
+痧
+鲲
+俯
+售
+笼
+痣
+扉
+挖
+满
+咋
+援
+邱
+扇
+歪
+便
+玑
+绦
+峡
+蛇
+叨
+〖
+泽
+胃
+斓
+喋
+怂
+坟
+猪
+该
+蚬
+炕
+弥
+赞
+棣
+晔
+娠
+挲
+狡
+创
+疖
+铕
+镭
+稷
+挫
+弭
+啾
+翔
+粉
+履
+苘
+哦
+楼
+秕
+铂
+土
+锣
+瘟
+挣
+栉
+习
+享
+桢
+袅
+磨
+桂
+谦
+延
+坚
+蔚
+噗
+署
+谟
+猬
+钎
+恐
+嬉
+雒
+倦
+衅
+亏
+璩
+睹
+刻
+殿
+王
+算
+雕
+麻
+丘
+柯
+骆
+丸
+塍
+谚
+添
+鲈
+垓
+桎
+蚯
+芥
+予
+飕
+镦
+谌
+窗
+醚
+菀
+亮
+搪
+莺
+蒿
+羁
+足
+J
+真
+轶
+悬
+衷
+靛
+翊
+掩
+哒
+炅
+掐
+冼
+妮
+l
+谐
+稚
+荆
+擒
+犯
+陵
+虏
+浓
+崽
+刍
+陌
+傻
+孜
+千
+靖
+演
+矜
+钕
+煽
+杰
+酗
+渗
+伞
+栋
+俗
+泫
+戍
+罕
+沾
+疽
+灏
+煦
+芬
+磴
+叱
+阱
+榉
+湃
+蜀
+叉
+醒
+彪
+租
+郡
+篷
+屎
+良
+垢
+隗
+弱
+陨
+峪
+砷
+掴
+颁
+胎
+雯
+绵
+贬
+沐
+撵
+隘
+篙
+暖
+曹
+陡
+栓
+填
+臼
+彦
+瓶
+琪
+潼
+哪
+鸡
+摩
+啦
+俟
+锋
+域
+耻
+蔫
+疯
+纹
+撇
+毒
+绶
+痛
+酯
+忍
+爪
+赳
+歆
+嘹
+辕
+烈
+册
+朴
+钱
+吮
+毯
+癜
+娃
+谀
+邵
+厮
+炽
+璞
+邃
+丐
+追
+词
+瓒
+忆
+轧
+芫
+谯
+喷
+弟
+半
+冕
+裙
+掖
+墉
+绮
+寝
+苔
+势
+顷
+褥
+切
+衮
+君
+佳
+嫒
+蚩
+霞
+佚
+洙
+逊
+镖
+暹
+唛
+&
+殒
+顶
+碗
+獗
+轭
+铺
+蛊
+废
+恹
+汨
+崩
+珍
+那
+杵
+曲
+纺
+夏
+薰
+傀
+闳
+淬
+姘
+舀
+拧
+卷
+楂
+恍
+讪
+厩
+寮
+篪
+赓
+乘
+灭
+盅
+鞣
+沟
+慎
+挂
+饺
+鼾
+杳
+树
+缨
+丛
+絮
+娌
+臻
+嗳
+篡
+侩
+述
+衰
+矛
+圈
+蚜
+匕
+筹
+匿
+濞
+晨
+叶
+骋
+郝
+挚
+蚴
+滞
+增
+侍
+描
+瓣
+吖
+嫦
+蟒
+匾
+圣
+赌
+毡
+癞
+恺
+百
+曳
+需
+篓
+肮
+庖
+帏
+卿
+驿
+遗
+蹬
+鬓
+骡
+歉
+芎
+胳
+屐
+禽
+烦
+晌
+寄
+媾
+狄
+翡
+苒
+船
+廉
+终
+痞
+殇
+々
+畦
+饶
+改
+拆
+悻
+萄
+￡
+瓿
+乃
+訾
+桅
+匮
+溧
+拥
+纱
+铍
+骗
+蕃
+龋
+缬
+父
+佐
+疚
+栎
+醍
+掳
+蓄
+x
+惆
+颜
+鲆
+榆
+〔
+猎
+敌
+暴
+谥
+鲫
+贾
+罗
+玻
+缄
+扦
+芪
+癣
+落
+徒
+臾
+恿
+猩
+托
+邴
+肄
+牵
+春
+陛
+耀
+刊
+拓
+蓓
+邳
+堕
+寇
+枉
+淌
+啡
+湄
+兽
+酷
+萼
+碚
+濠
+萤
+夹
+旬
+戮
+梭
+琥
+椭
+昔
+勺
+蜊
+绐
+晚
+孺
+僵
+宣
+摄
+冽
+旨
+萌
+忙
+蚤
+眉
+噼
+蟑
+付
+契
+瓜
+悼
+颡
+壁
+曾
+窕
+颢
+澎
+仿
+俑
+浑
+嵌
+浣
+乍
+碌
+褪
+乱
+蔟
+隙
+玩
+剐
+葫
+箫
+纲
+围
+伐
+决
+伙
+漩
+瑟
+刑
+肓
+镳
+缓
+蹭
+氨
+皓
+典
+畲
+坍
+铑
+檐
+塑
+洞
+倬
+储
+胴
+淳
+戾
+吐
+灼
+惺
+妙
+毕
+珐
+缈
+虱
+盖
+羰
+鸿
+磅
+谓
+髅
+娴
+苴
+唷
+蚣
+霹
+抨
+贤
+唠
+犬
+誓
+逍
+庠
+逼
+麓
+籼
+釉
+呜
+碧
+秧
+氩
+摔
+霄
+穸
+纨
+辟
+妈
+映
+完
+牛
+缴
+嗷
+炊
+恩
+荔
+茆
+掉
+紊
+慌
+莓
+羟
+阙
+萁
+磐
+另
+蕹
+辱
+鳐
+湮
+吡
+吩
+唐
+睦
+垠
+舒
+圜
+冗
+瞿
+溺
+芾
+囱
+匠
+僳
+汐
+菩
+饬
+漓
+黑
+霰
+浸
+濡
+窥
+毂
+蒡
+兢
+驻
+鹉
+芮
+诙
+迫
+雳
+厂
+忐
+臆
+猴
+鸣
+蚪
+栈
+箕
+羡
+渐
+莆
+捍
+眈
+哓
+趴
+蹼
+埕
+嚣
+骛
+宏
+淄
+斑
+噜
+严
+瑛
+垃
+椎
+诱
+压
+庾
+绞
+焘
+廿
+抡
+迄
+棘
+夫
+纬
+锹
+眨
+瞌
+侠
+脐
+竞
+瀑
+孳
+骧
+遁
+姜
+颦
+荪
+滚
+萦
+伪
+逸
+粳
+爬
+锁
+矣
+役
+趣
+洒
+颔
+诏
+逐
+奸
+甭
+惠
+攀
+蹄
+泛
+尼
+拼
+阮
+鹰
+亚
+颈
+惑
+勒
+〉
+际
+肛
+爷
+刚
+钨
+丰
+养
+冶
+鲽
+辉
+蔻
+画
+覆
+皴
+妊
+麦
+返
+醉
+皂
+擀
+〗
+酶
+凑
+粹
+悟
+诀
+硖
+港
+卜
+z
+杀
+涕
+±
+舍
+铠
+抵
+弛
+段
+敝
+镐
+奠
+拂
+轴
+跛
+袱
+e
+t
+沉
+菇
+俎
+薪
+峦
+秭
+蟹
+历
+盟
+菠
+寡
+液
+肢
+喻
+染
+裱
+悱
+抱
+氙
+赤
+捅
+猛
+跑
+氮
+谣
+仁
+尺
+辊
+窍
+烙
+衍
+架
+擦
+倏
+璐
+瑁
+币
+楞
+胖
+夔
+趸
+邛
+惴
+饕
+虔
+蝎
+§
+哉
+贝
+宽
+辫
+炮
+扩
+饲
+籽
+魏
+菟
+锰
+伍
+猝
+末
+琳
+哚
+蛎
+邂
+呀
+姿
+鄞
+却
+歧
+仙
+恸
+椐
+森
+牒
+寤
+袒
+婆
+虢
+雅
+钉
+朵
+贼
+欲
+苞
+寰
+故
+龚
+坭
+嘘
+咫
+礼
+硷
+兀
+睢
+汶
+’
+铲
+烧
+绕
+诃
+浃
+钿
+哺
+柜
+讼
+颊
+璁
+腔
+洽
+咐
+脲
+簌
+筠
+镣
+玮
+鞠
+谁
+兼
+姆
+挥
+梯
+蝴
+谘
+漕
+刷
+躏
+宦
+弼
+b
+垌
+劈
+麟
+莉
+揭
+笙
+渎
+仕
+嗤
+仓
+配
+怏
+抬
+错
+泯
+镊
+孰
+猿
+邪
+仍
+秋
+鼬
+壹
+歇
+吵
+炼
+<
+尧
+射
+柬
+廷
+胧
+霾
+凳
+隋
+肚
+浮
+梦
+祥
+株
+堵
+退
+L
+鹫
+跎
+凶
+毽
+荟
+炫
+栩
+玳
+甜
+沂
+鹿
+顽
+伯
+爹
+赔
+蛴
+徐
+匡
+欣
+狰
+缸
+雹
+蟆
+疤
+默
+沤
+啜
+痂
+衣
+禅
+w
+i
+h
+辽
+葳
+黝
+钗
+停
+沽
+棒
+馨
+颌
+肉
+吴
+硫
+悯
+劾
+娈
+马
+啧
+吊
+悌
+镑
+峭
+帆
+瀣
+涉
+咸
+疸
+滋
+泣
+翦
+拙
+癸
+钥
+蜒
++
+尾
+庄
+凝
+泉
+婢
+渴
+谊
+乞
+陆
+锉
+糊
+鸦
+淮
+I
+B
+N
+晦
+弗
+乔
+庥
+葡
+尻
+席
+橡
+傣
+渣
+拿
+惩
+麋
+斛
+缃
+矮
+蛏
+岘
+鸽
+姐
+膏
+催
+奔
+镒
+喱
+蠡
+摧
+钯
+胤
+柠
+拐
+璋
+鸥
+卢
+荡
+倾
+^
+_
+珀
+逄
+萧
+塾
+掇
+贮
+笆
+聂
+圃
+冲
+嵬
+M
+滔
+笕
+值
+炙
+偶
+蜱
+搐
+梆
+汪
+蔬
+腑
+鸯
+蹇
+敞
+绯
+仨
+祯
+谆
+梧
+糗
+鑫
+啸
+豺
+囹
+猾
+巢
+柄
+瀛
+筑
+踌
+沭
+暗
+苁
+鱿
+蹉
+脂
+蘖
+牢
+热
+木
+吸
+溃
+宠
+序
+泞
+偿
+拜
+檩
+厚
+朐
+毗
+螳
+吞
+媚
+朽
+担
+蝗
+橘
+畴
+祈
+糟
+盱
+隼
+郜
+惜
+珠
+裨
+铵
+焙
+琚
+唯
+咚
+噪
+骊
+丫
+滢
+勤
+棉
+呸
+咣
+淀
+隔
+蕾
+窈
+饨
+挨
+煅
+短
+匙
+粕
+镜
+赣
+撕
+墩
+酬
+馁
+豌
+颐
+抗
+酣
+氓
+佑
+搁
+哭
+递
+耷
+涡
+桃
+贻
+碣
+截
+瘦
+昭
+镌
+蔓
+氚
+甲
+猕
+蕴
+蓬
+散
+拾
+纛
+狼
+猷
+铎
+埋
+旖
+矾
+讳
+囊
+糜
+迈
+粟
+蚂
+紧
+鲳
+瘢
+栽
+稼
+羊
+锄
+斟
+睁
+桥
+瓮
+蹙
+祉
+醺
+鼻
+昱
+剃
+跳
+篱
+跷
+蒜
+翎
+宅
+晖
+嗑
+壑
+峻
+癫
+屏
+狠
+陋
+袜
+途
+憎
+祀
+莹
+滟
+佶
+溥
+臣
+约
+盛
+峰
+磁
+慵
+婪
+拦
+莅
+朕
+鹦
+粲
+裤
+哎
+疡
+嫖
+琵
+窟
+堪
+谛
+嘉
+儡
+鳝
+斩
+郾
+驸
+酊
+妄
+胜
+贺
+徙
+傅
+噌
+钢
+栅
+庇
+恋
+匝
+巯
+邈
+尸
+锚
+粗
+佟
+蛟
+薹
+纵
+蚊
+郅
+绢
+锐
+苗
+俞
+篆
+淆
+膀
+鲜
+煎
+诶
+秽
+寻
+涮
+刺
+怀
+噶
+巨
+褰
+魅
+灶
+灌
+桉
+藕
+谜
+舸
+薄
+搀
+恽
+借
+牯
+痉
+渥
+愿
+亓
+耘
+杠
+柩
+锔
+蚶
+钣
+珈
+喘
+蹒
+幽
+赐
+稗
+晤
+莱
+泔
+扯
+肯
+菪
+裆
+腩
+豉
+疆
+骜
+腐
+倭
+珏
+唔
+粮
+亡
+润
+慰
+伽
+橄
+玄
+誉
+醐
+胆
+龊
+粼
+塬
+陇
+彼
+削
+嗣
+绾
+芽
+妗
+垭
+瘴
+爽
+薏
+寨
+龈
+泠
+弹
+赢
+漪
+猫
+嘧
+涂
+恤
+圭
+茧
+烽
+屑
+痕
+巾
+赖
+荸
+凰
+腮
+畈
+亵
+蹲
+偃
+苇
+澜
+艮
+换
+骺
+烘
+苕
+梓
+颉
+肇
+哗
+悄
+氤
+涠
+葬
+屠
+鹭
+植
+竺
+佯
+诣
+鲇
+瘀
+鲅
+邦
+移
+滁
+冯
+耕
+癔
+戌
+茬
+沁
+巩
+悠
+湘
+洪
+痹
+锟
+循
+谋
+腕
+鳃
+钠
+捞
+焉
+迎
+碱
+伫
+急
+榷
+奈
+邝
+卯
+辄
+皲
+卟
+醛
+畹
+忧
+稳
+雄
+昼
+缩
+阈
+睑
+扌
+耗
+曦
+涅
+捏
+瞧
+邕
+淖
+漉
+铝
+耦
+禹
+湛
+喽
+莼
+琅
+诸
+苎
+纂
+硅
+始
+嗨
+傥
+燃
+臂
+赅
+嘈
+呆
+贵
+屹
+壮
+肋
+亍
+蚀
+卅
+豹
+腆
+邬
+迭
+浊
+}
+童
+螂
+捐
+圩
+勐
+触
+寞
+汊
+壤
+荫
+膺
+渌
+芳
+懿
+遴
+螈
+泰
+蓼
+蛤
+茜
+舅
+枫
+朔
+膝
+眙
+避
+梅
+判
+鹜
+璜
+牍
+缅
+垫
+藻
+黔
+侥
+惚
+懂
+踩
+腰
+腈
+札
+丞
+唾
+慈
+顿
+摹
+荻
+琬
+~
+斧
+沈
+滂
+胁
+胀
+幄
+莜
+Z
+匀
+鄄
+掌
+绰
+茎
+焚
+赋
+萱
+谑
+汁
+铒
+瞎
+夺
+蜗
+野
+娆
+冀
+弯
+篁
+懵
+灞
+隽
+芡
+脘
+俐
+辩
+芯
+掺
+喏
+膈
+蝈
+觐
+悚
+踹
+蔗
+熠
+鼠
+呵
+抓
+橼
+峨
+畜
+缔
+禾
+崭
+弃
+熊
+摒
+凸
+拗
+穹
+蒙
+抒
+祛
+劝
+闫
+扳
+阵
+醌
+踪
+喵
+侣
+搬
+仅
+荧
+赎
+蝾
+琦
+买
+婧
+瞄
+寓
+皎
+冻
+赝
+箩
+莫
+瞰
+郊
+笫
+姝
+筒
+枪
+遣
+煸
+袋
+舆
+痱
+涛
+母
+〇
+启
+践
+耙
+绲
+盘
+遂
+昊
+搞
+槿
+诬
+纰
+泓
+惨
+檬
+亻
+越
+C
+o
+憩
+熵
+祷
+钒
+暧
+塔
+阗
+胰
+咄
+娶
+魔
+琶
+钞
+邻
+扬
+杉
+殴
+咽
+弓
+〆
+髻
+】
+吭
+揽
+霆
+拄
+殖
+脆
+彻
+岩
+芝
+勃
+辣
+剌
+钝
+嘎
+甄
+佘
+皖
+伦
+授
+徕
+憔
+挪
+皇
+庞
+稔
+芜
+踏
+溴
+兖
+卒
+擢
+饥
+鳞
+煲
+‰
+账
+颗
+叻
+斯
+捧
+鳍
+琮
+讹
+蛙
+纽
+谭
+酸
+兔
+莒
+睇
+伟
+觑
+羲
+嗜
+宜
+褐
+旎
+辛
+卦
+诘
+筋
+鎏
+溪
+挛
+熔
+阜
+晰
+鳅
+丢
+奚
+灸
+呱
+献
+陉
+黛
+鸪
+甾
+萨
+疮
+拯
+洲
+疹
+辑
+叙
+恻
+谒
+允
+柔
+烂
+氏
+逅
+漆
+拎
+惋
+扈
+湟
+纭
+啕
+掬
+擞
+哥
+忽
+涤
+鸵
+靡
+郗
+瓷
+扁
+廊
+怨
+雏
+钮
+敦
+E
+懦
+憋
+汀
+拚
+啉
+腌
+岸
+f
+痼
+瞅
+尊
+咀
+眩
+飙
+忌
+仝
+迦
+熬
+毫
+胯
+篑
+茄
+腺
+凄
+舛
+碴
+锵
+诧
+羯
+後
+漏
+汤
+宓
+仞
+蚁
+壶
+谰
+皑
+铄
+棰
+罔
+辅
+晶
+苦
+牟
+闽
+\
+烃
+饮
+聿
+丙
+蛳
+朱
+煤
+涔
+鳖
+犁
+罐
+荼
+砒
+淦
+妤
+黏
+戎
+孑
+婕
+瑾
+戢
+钵
+枣
+捋
+砥
+衩
+狙
+桠
+稣
+阎
+肃
+梏
+诫
+孪
+昶
+婊
+衫
+嗔
+侃
+塞
+蜃
+樵
+峒
+貌
+屿
+欺
+缫
+阐
+栖
+诟
+珞
+荭
+吝
+萍
+嗽
+恂
+啻
+蜴
+磬
+峋
+俸
+豫
+谎
+徊
+镍
+韬
+魇
+晴
+U
+囟
+猜
+蛮
+坐
+囿
+伴
+亭
+肝
+佗
+蝠
+妃
+胞
+滩
+榴
+氖
+垩
+苋
+砣
+扪
+馏
+姓
+轩
+厉
+夥
+侈
+禀
+垒
+岑
+赏
+钛
+辐
+痔
+披
+纸
+碳
+“
+坞
+蠓
+挤
+荥
+沅
+悔
+铧
+帼
+蒌
+蝇
+a
+p
+y
+n
+g
+哀
+浆
+瑶
+凿
+桶
+馈
+皮
+奴
+苜
+佤
+伶
+晗
+铱
+炬
+优
+弊
+氢
+恃
+甫
+攥
+端
+锌
+灰
+稹
+炝
+曙
+邋
+亥
+眶
+碾
+拉
+萝
+绔
+捷
+浍
+腋
+姑
+菖
+凌
+涞
+麽
+锢
+桨
+潢
+绎
+镰
+殆
+锑
+渝
+铬
+困
+绽
+觎
+匈
+糙
+暑
+裹
+鸟
+盔
+肽
+迷
+綦
+『
+亳
+佝
+俘
+钴
+觇
+骥
+仆
+疝
+跪
+婶
+郯
+瀹
+唉
+脖
+踞
+针
+晾
+忒
+扼
+瞩
+叛
+椒
+疟
+嗡
+邗
+肆
+跆
+玫
+忡
+捣
+咧
+唆
+艄
+蘑
+潦
+笛
+阚
+沸
+泻
+掊
+菽
+贫
+斥
+髂
+孢
+镂
+赂
+麝
+鸾
+屡
+衬
+苷
+恪
+叠
+希
+粤
+爻
+喝
+茫
+惬
+郸
+绻
+庸
+撅
+碟
+宄
+妹
+膛
+叮
+饵
+崛
+嗲
+椅
+冤
+搅
+咕
+敛
+尹
+垦
+闷
+蝉
+霎
+勰
+败
+蓑
+泸
+肤
+鹌
+幌
+焦
+浠
+鞍
+刁
+舰
+乙
+竿
+裔
+。
+茵
+函
+伊
+兄
+丨
+娜
+匍
+謇
+莪
+宥
+似
+蝽
+翳
+酪
+翠
+粑
+薇
+祢
+骏
+赠
+叫
+Q
+噤
+噻
+竖
+芗
+莠
+潭
+俊
+羿
+耜
+O
+郫
+趁
+嗪
+囚
+蹶
+芒
+洁
+笋
+鹑
+敲
+硝
+啶
+堡
+渲
+揩
+』
+携
+宿
+遒
+颍
+扭
+棱
+割
+萜
+蔸
+葵
+琴
+捂
+饰
+衙
+耿
+掠
+募
+岂
+窖
+涟
+蔺
+瘤
+柞
+瞪
+怜
+匹
+距
+楔
+炜
+哆
+秦
+缎
+幼
+茁
+绪
+痨
+恨
+楸
+娅
+瓦
+桩
+雪
+嬴
+伏
+榔
+妥
+铿
+拌
+眠
+雍
+缇
+‘
+卓
+搓
+哌
+觞
+噩
+屈
+哧
+髓
+咦
+巅
+娑
+侑
+淫
+膳
+祝
+勾
+姊
+莴
+胄
+疃
+薛
+蜷
+胛
+巷
+芙
+芋
+熙
+闰
+勿
+窃
+狱
+剩
+钏
+幢
+陟
+铛
+慧
+靴
+耍
+k
+浙
+浇
+飨
+惟
+绗
+祜
+澈
+啼
+咪
+磷
+摞
+诅
+郦
+抹
+跃
+壬
+吕
+肖
+琏
+颤
+尴
+剡
+抠
+凋
+赚
+泊
+津
+宕
+殷
+倔
+氲
+漫
+邺
+涎
+怠
+$
+垮
+荬
+遵
+俏
+叹
+噢
+饽
+蜘
+孙
+筵
+疼
+鞭
+羧
+牦
+箭
+潴
+c
+眸
+祭
+髯
+啖
+坳
+愁
+芩
+驮
+倡
+巽
+穰
+沃
+胚
+怒
+凤
+槛
+剂
+趵
+嫁
+v
+邢
+灯
+鄢
+桐
+睽
+檗
+锯
+槟
+婷
+嵋
+圻
+诗
+蕈
+颠
+遭
+痢
+芸
+怯
+馥
+竭
+锗
+徜
+恭
+遍
+籁
+剑
+嘱
+苡
+龄
+僧
+桑
+潸
+弘
+澶
+楹
+悲
+讫
+愤
+腥
+悸
+谍
+椹
+呢
+桓
+葭
+攫
+阀
+翰
+躲
+敖
+柑
+郎
+笨
+橇
+呃
+魁
+燎
+脓
+葩
+磋
+垛
+玺
+狮
+沓
+砜
+蕊
+锺
+罹
+蕉
+翱
+虐
+闾
+巫
+旦
+茱
+嬷
+枯
+鹏
+贡
+芹
+汛
+矫
+绁
+拣
+禺
+佃
+讣
+舫
+惯
+乳
+趋
+疲
+挽
+岚
+虾
+衾
+蠹
+蹂
+飓
+氦
+铖
+孩
+稞
+瑜
+壅
+掀
+勘
+妓
+畅
+髋
+W
+庐
+牲
+蓿
+榕
+练
+垣
+唱
+邸
+菲
+昆
+婺
+穿
+绡
+麒
+蚱
+掂
+愚
+泷
+涪
+漳
+妩
+娉
+榄
+讷
+觅
+旧
+藤
+煮
+呛
+柳
+腓
+叭
+庵
+烷
+阡
+罂
+蜕
+擂
+猖
+咿
+媲
+脉
+【
+沏
+貅
+黠
+熏
+哲
+烁
+坦
+酵
+兜
+×
+潇
+撒
+剽
+珩
+圹
+乾
+摸
+樟
+帽
+嗒
+襄
+魂
+轿
+憬
+锡
+〕
+喃
+皆
+咖
+隅
+脸
+残
+泮
+袂
+鹂
+珊
+囤
+捆
+咤
+误
+徨
+闹
+淙
+芊
+淋
+怆
+囗
+拨
+梳
+渤
+R
+G
+绨
+蚓
+婀
+幡
+狩
+麾
+谢
+唢
+裸
+旌
+伉
+纶
+裂
+驳
+砼
+咛
+澄
+樨
+蹈
+宙
+澍
+倍
+貔
+操
+勇
+蟠
+摈
+砧
+虬
+够
+缁
+悦
+藿
+撸
+艹
+摁
+淹
+豇
+虎
+榭
+ˉ
+吱
+d
+°
+喧
+荀
+踱
+侮
+奋
+偕
+饷
+犍
+惮
+坑
+璎
+徘
+宛
+妆
+袈
+倩
+窦
+昂
+荏
+乖
+K
+怅
+撰
+鳙
+牙
+袁
+酞
+X
+痿
+琼
+闸
+雁
+趾
+荚
+虻
+涝
+《
+杏
+韭
+偈
+烤
+绫
+鞘
+卉
+症
+遢
+蓥
+诋
+杭
+荨
+匆
+竣
+簪
+辙
+敕
+虞
+丹
+缭
+咩
+黟
+m
+淤
+瑕
+咂
+铉
+硼
+茨
+嶂
+痒
+畸
+敬
+涿
+粪
+窘
+熟
+叔
+嫔
+盾
+忱
+裘
+憾
+梵
+赡
+珙
+咯
+娘
+庙
+溯
+胺
+葱
+痪
+摊
+荷
+卞
+乒
+髦
+寐
+铭
+坩
+胗
+枷
+爆
+溟
+嚼
+羚
+砬
+轨
+惊
+挠
+罄
+竽
+菏
+氧
+浅
+楣
+盼
+枢
+炸
+阆
+杯
+谏
+噬
+淇
+渺
+俪
+秆
+墓
+泪
+跻
+砌
+痰
+垡
+渡
+耽
+釜
+讶
+鳎
+煞
+呗
+韶
+舶
+绷
+鹳
+缜
+旷
+铊
+皱
+龌
+檀
+霖
+奄
+槐
+艳
+蝶
+旋
+哝
+赶
+骞
+蚧
+腊
+盈
+丁
+`
+蜚
+矸
+蝙
+睨
+嚓
+僻
+鬼
+醴
+夜
+彝
+磊
+笔
+拔
+栀
+糕
+厦
+邰
+纫
+逭
+纤
+眦
+膊
+馍
+躇
+烯
+蘼
+冬
+诤
+暄
+骶
+哑
+瘠
+」
+臊
+丕
+愈
+咱
+螺
+擅
+跋
+搏
+硪
+谄
+笠
+淡
+嘿
+骅
+谧
+鼎
+皋
+姚
+歼
+蠢
+驼
+耳
+胬
+挝
+涯
+狗
+蒽
+孓
+犷
+凉
+芦
+箴
+铤
+孤
+嘛
+坤
+V
+茴
+朦
+挞
+尖
+橙
+诞
+搴
+碇
+洵
+浚
+帚
+蜍
+漯
+柘
+嚎
+讽
+芭
+荤
+咻
+祠
+秉
+跖
+埃
+吓
+糯
+眷
+馒
+惹
+娼
+鲑
+嫩
+讴
+轮
+瞥
+靶
+褚
+乏
+缤
+宋
+帧
+删
+驱
+碎
+扑
+俩
+俄
+偏
+涣
+竹
+噱
+皙
+佰
+渚
+唧
+斡
+#
+镉
+刀
+崎
+筐
+佣
+夭
+贰
+肴
+峙
+哔
+艿
+匐
+牺
+镛
+缘
+仡
+嫡
+劣
+枸
+堀
+梨
+簿
+鸭
+蒸
+亦
+稽
+浴
+{
+衢
+束
+槲
+j
+阁
+揍
+疥
+棋
+潋
+聪
+窜
+乓
+睛
+插
+冉
+阪
+苍
+搽
+「
+蟾
+螟
+幸
+仇
+樽
+撂
+慢
+跤
+幔
+俚
+淅
+覃
+觊
+溶
+妖
+帛
+侨
+曰
+妾
+泗
+·
+：
+瀘
+風
+Ë
+（
+）
+∶
+紅
+紗
+瑭
+雲
+頭
+鶏
+財
+許
+•
+¥
+樂
+焗
+麗
+—
+；
+滙
+東
+榮
+繪
+興
+…
+門
+業
+π
+楊
+國
+顧
+é
+盤
+寳
+Λ
+龍
+鳳
+島
+誌
+緣
+結
+銭
+萬
+勝
+祎
+璟
+優
+歡
+臨
+時
+購
+＝
+★
+藍
+昇
+鐵
+觀
+勅
+農
+聲
+畫
+兿
+術
+發
+劉
+記
+專
+耑
+園
+書
+壴
+種
+Ο
+●
+褀
+號
+銀
+匯
+敟
+锘
+葉
+橪
+廣
+進
+蒄
+鑽
+阝
+祙
+貢
+鍋
+豊
+夬
+喆
+團
+閣
+開
+燁
+賓
+館
+酡
+沔
+順
+＋
+硚
+劵
+饸
+陽
+車
+湓
+復
+萊
+氣
+軒
+華
+堃
+迮
+纟
+戶
+馬
+學
+裡
+電
+嶽
+獨
+マ
+シ
+サ
+ジ
+燘
+袪
+環
+❤
+臺
+灣
+専
+賣
+孖
+聖
+攝
+線
+▪
+α
+傢
+俬
+夢
+達
+莊
+喬
+貝
+薩
+劍
+羅
+壓
+棛
+饦
+尃
+璈
+囍
+醫
+Ｇ
+Ｉ
+Ａ
+＃
+Ｎ
+鷄
+髙
+嬰
+啓
+約
+隹
+潔
+賴
+藝
+～
+寶
+籣
+麺
+　
+嶺
+√
+義
+網
+峩
+長
+∧
+魚
+機
+構
+②
+鳯
+偉
+Ｌ
+Ｂ
+㙟
+畵
+鴿
+＇
+詩
+溝
+嚞
+屌
+藔
+佧
+玥
+蘭
+織
+１
+３
+９
+０
+７
+點
+砭
+鴨
+鋪
+銘
+廳
+弍
+‧
+創
+湯
+坶
+℃
+卩
+骝
+＆
+烜
+荘
+當
+潤
+扞
+係
+懷
+碶
+钅
+蚨
+讠
+☆
+叢
+爲
+埗
+涫
+塗
+→
+楽
+現
+鯨
+愛
+瑪
+鈺
+忄
+悶
+藥
+飾
+樓
+視
+孬
+ㆍ
+燚
+苪
+師
+①
+丼
+锽
+│
+韓
+標
+è
+兒
+閏
+匋
+張
+漢
+Ü
+髪
+會
+閑
+檔
+習
+裝
+の
+峯
+菘
+輝
+И
+雞
+釣
+億
+浐
+Ｋ
+Ｏ
+Ｒ
+８
+Ｈ
+Ｅ
+Ｐ
+Ｔ
+Ｗ
+Ｄ
+Ｓ
+Ｃ
+Ｍ
+Ｆ
+姌
+饹
+»
+晞
+廰
+ä
+嵯
+鷹
+負
+飲
+絲
+冚
+楗
+澤
+綫
+區
+❋
+←
+質
+靑
+揚
+③
+滬
+統
+産
+協
+﹑
+乸
+畐
+經
+運
+際
+洺
+岽
+為
+粵
+諾
+崋
+豐
+碁
+ɔ
+Ｖ
+２
+６
+齋
+誠
+訂
+´
+勑
+雙
+陳
+無
+í
+泩
+媄
+夌
+刂
+ｉ
+ｃ
+ｔ
+ｏ
+ｒ
+ａ
+嘢
+耄
+燴
+暃
+壽
+媽
+靈
+抻
+體
+唻
+É
+冮
+甹
+鎮
+錦
+ʌ
+蜛
+蠄
+尓
+駕
+戀
+飬
+逹
+倫
+貴
+極
+Я
+Й
+寬
+磚
+嶪
+郎
+職
+｜
+間
+ｎ
+ｄ
+剎
+伈
+課
+飛
+橋
+瘊
+№
+譜
+骓
+圗
+滘
+縣
+粿
+咅
+養
+濤
+彳
+®
+％
+Ⅱ
+啰
+㴪
+見
+矞
+薬
+糁
+邨
+鲮
+顔
+罱
+З
+選
+話
+贏
+氪
+俵
+競
+瑩
+繡
+枱
+β
+綉
+á
+獅
+爾
+™
+麵
+戋
+淩
+徳
+個
+劇
+場
+務
+簡
+寵
+ｈ
+實
+膠
+轱
+圖
+築
+嘣
+樹
+㸃
+營
+耵
+孫
+饃
+鄺
+飯
+麯
+遠
+輸
+坫
+孃
+乚
+閃
+鏢
+㎡
+題
+廠
+關
+↑
+爺
+將
+軍
+連
+篦
+覌
+參
+箸
+－
+窠
+棽
+寕
+夀
+爰
+歐
+呙
+閥
+頡
+熱
+雎
+垟
+裟
+凬
+勁
+帑
+馕
+夆
+疌
+枼
+馮
+貨
+蒤
+樸
+彧
+旸
+靜
+龢
+暢
+㐱
+鳥
+珺
+鏡
+灡
+爭
+堷
+廚
+Ó
+騰
+診
+┅
+蘇
+褔
+凱
+頂
+豕
+亞
+帥
+嘬
+⊥
+仺
+桖
+複
+饣
+絡
+穂
+顏
+棟
+納
+▏
+濟
+親
+設
+計
+攵
+埌
+烺
+ò
+頤
+燦
+蓮
+撻
+節
+講
+濱
+濃
+娽
+洳
+朿
+燈
+鈴
+護
+膚
+铔
+過
+補
+Ｚ
+Ｕ
+５
+４
+坋
+闿
+䖝
+餘
+缐
+铞
+貿
+铪
+桼
+趙
+鍊
+［
+㐂
+垚
+菓
+揸
+捲
+鐘
+滏
+𣇉
+爍
+輪
+燜
+鴻
+鮮
+動
+鹞
+鷗
+丄
+慶
+鉌
+翥
+飮
+腸
+⇋
+漁
+覺
+來
+熘
+昴
+翏
+鲱
+圧
+鄉
+萭
+頔
+爐
+嫚
+г
+貭
+類
+聯
+幛
+輕
+訓
+鑒
+夋
+锨
+芃
+珣
+䝉
+扙
+嵐
+銷
+處
+ㄱ
+語
+誘
+苝
+歸
+儀
+燒
+楿
+內
+粢
+葒
+奧
+麥
+礻
+滿
+蠔
+穵
+瞭
+態
+鱬
+榞
+硂
+鄭
+黃
+煙
+祐
+奓
+逺
+＊
+瑄
+獲
+聞
+薦
+讀
+這
+樣
+決
+問
+啟
+們
+執
+説
+轉
+單
+隨
+唘
+帶
+倉
+庫
+還
+贈
+尙
+皺
+■
+餅
+產
+○
+∈
+報
+狀
+楓
+賠
+琯
+嗮
+禮
+｀
+傳
+＞
+≤
+嗞
+Φ
+≥
+換
+咭
+∣
+↓
+曬
+ε
+応
+寫
+″
+終
+様
+純
+費
+療
+聨
+凍
+壐
+郵
+ü
+黒
+∫
+製
+塊
+調
+軽
+確
+撃
+級
+馴
+Ⅲ
+涇
+繹
+數
+碼
+證
+狒
+処
+劑
+＜
+晧
+賀
+衆
+］
+櫥
+兩
+陰
+絶
+對
+鯉
+憶
+◎
+ｐ
+ｅ
+Ｙ
+蕒
+煖
+頓
+測
+試
+鼽
+僑
+碩
+妝
+帯
+≈
+鐡
+舖
+權
+喫
+倆
+ˋ
+該
+悅
+ā
+俫
+．
+ｆ
+ｓ
+ｂ
+ｍ
+ｋ
+ｇ
+ｕ
+ｊ
+貼
+淨
+濕
+針
+適
+備
+ｌ
+／
+給
+謢
+強
+觸
+衛
+與
+⊙
+＄
+緯
+變
+⑴
+⑵
+⑶
+㎏
+殺
+∩
+幚
+─
+價
+▲
+離
+ú
+ó
+飄
+烏
+関
+閟
+﹝
+﹞
+邏
+輯
+鍵
+驗
+訣
+導
+歷
+屆
+層
+▼
+儱
+錄
+熳
+ē
+艦
+吋
+錶
+辧
+飼
+顯
+④
+禦
+販
+気
+対
+枰
+閩
+紀
+幹
+瞓
+貊
+淚
+△
+眞
+墊
+Ω
+獻
+褲
+縫
+緑
+亜
+鉅
+餠
+｛
+｝
+◆
+蘆
+薈
+█
+◇
+溫
+彈
+晳
+粧
+犸
+穩
+訊
+崬
+凖
+熥
+П
+舊
+條
+紋
+圍
+Ⅳ
+筆
+尷
+難
+雜
+錯
+綁
+識
+頰
+鎖
+艶
+□
+殁
+殼
+⑧
+├
+▕
+鵬
+ǐ
+ō
+ǒ
+糝
+綱
+▎
+μ
+盜
+饅
+醬
+籤
+蓋
+釀
+鹽
+據
+à
+ɡ
+辦
+◥
+彐
+┌
+婦
+獸
+鲩
+伱
+ī
+蒟
+蒻
+齊
+袆
+腦
+寧
+凈
+妳
+煥
+詢
+偽
+謹
+啫
+鯽
+騷
+鱸
+損
+傷
+鎻
+髮
+買
+冏
+儥
+両
+﹢
+∞
+載
+喰
+ｚ
+羙
+悵
+燙
+曉
+員
+組
+徹
+艷
+痠
+鋼
+鼙
+縮
+細
+嚒
+爯
+≠
+維
+＂
+鱻
+壇
+厍
+帰
+浥
+犇
+薡
+軎
+²
+應
+醜
+刪
+緻
+鶴
+賜
+噁
+軌
+尨
+镔
+鷺
+槗
+彌
+葚
+濛
+請
+溇
+緹
+賢
+訪
+獴
+瑅
+資
+縤
+陣
+蕟
+栢
+韻
+祼
+恁
+伢
+謝
+劃
+涑
+總
+衖
+踺
+砋
+凉
+籃
+駿
+苼
+瘋
+昽
+紡
+驊
+腎
+﹗
+響
+杋
+剛
+嚴
+禪
+歓
+槍
+傘
+檸
+檫
+炣
+勢
+鏜
+鎢
+銑
+尐
+減
+奪
+惡
+θ
+僮
+婭
+臘
+ū
+ì
+殻
+鉄
+∑
+蛲
+焼
+緖
+續
+紹
+懮
diff --git a/docling_ibm_models/slanet_1m/dict_table/table_dict.txt b/docling_ibm_models/slanet_1m/dict_table/table_dict.txt
new file mode 100644
index 0000000..2ef028c
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/dict_table/table_dict.txt
@@ -0,0 +1,277 @@
+←
+</overline>
+☆
+─
+α
+﻿
+ 
+⋅
+$
+ω
+ψ
+χ
+(
+υ
+≥
+σ
+,
+ρ
+ε
+0
+■
+4
+8
+✗
+b
+<
+✓
+Ψ
+Ω
+€
+D
+3
+Π
+H
+║
+</strike>
+L
+Φ
+Χ
+θ
+P
+κ
+λ
+μ
+T
+ξ
+X
+β
+γ
+δ
+\
+ζ
+η
+`
+d
+<strike>
+h
+f
+l
+Θ
+p
+√
+t
+</sub>
+x
+Β
+Γ
+Δ
+|
+ǂ
+ɛ
+j
+̧
+➢
+⁡
+̌
+′
+«
+△
+▲
+#
+</b>
+'
+Ι
++
+¶
+/
+▼
+⇑
+□
+·
+7
+▪
+;
+?
+➔
+∩
+C
+÷
+G
+⇒
+K
+<sup>
+O
+S
+С
+W
+Α
+[
+○
+_
+●
+‡
+c
+z
+g
+<i>
+o
+<sub>
+〈
+〉
+s
+⩽
+w
+φ
+ʹ
+{
+»
+∣
+̆
+e
+ˆ
+∈
+τ
+◆
+ι
+∅
+∆
+∙
+∘
+Ø
+ß
+✔
+∞
+∑
+−
+×
+◊
+∗
+∖
+˃
+˂
+∫
+"
+i
+&
+π
+↔
+*
+∥
+æ
+∧
+.
+⁄
+ø
+Q
+∼
+6
+⁎
+:
+★
+>
+a
+B
+≈
+F
+J
+̄
+N
+♯
+R
+V
+<overline>
+―
+Z
+♣
+^
+¤
+¥
+§
+<underline>
+¢
+£
+≦
+­
+≤
+‖
+Λ
+©
+n
+↓
+→
+↑
+r
+°
+±
+v
+<b>
+♂
+k
+♀
+~
+ᅟ
+̇
+@
+”
+♦
+ł
+®
+⊕
+„
+!
+</sup>
+%
+⇓
+)
+-
+1
+5
+9
+=
+А
+A
+‰
+⋆
+Σ
+E
+◦
+I
+※
+M
+m
+̨
+⩾
+†
+</i>
+•
+U
+Y
+ 
+]
+̸
+2
+‐
+–
+‒
+̂
+—
+̀
+́
+’
+‘
+⋮
+⋯
+̊
+“
+̈
+≧
+q
+u
+ı
+y
+</underline>
+​
+̃
+}
+ν
diff --git a/docling_ibm_models/slanet_1m/dict_table/table_structure_dict.txt b/docling_ibm_models/slanet_1m/dict_table/table_structure_dict.txt
new file mode 100644
index 0000000..fec6f7d
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/dict_table/table_structure_dict.txt
@@ -0,0 +1,28 @@
+<thead>
+<tr>
+<td>
+</td>
+</tr>
+</thead>
+<tbody>
+</tbody>
+<td
+ colspan="5"
+>
+ colspan="2"
+ colspan="3"
+ rowspan="2"
+ colspan="4"
+ colspan="6"
+ rowspan="3"
+ colspan="9"
+ colspan="10"
+ colspan="7"
+ rowspan="4"
+ rowspan="5"
+ rowspan="9"
+ colspan="8"
+ rowspan="8"
+ rowspan="6"
+ rowspan="7"
+ rowspan="10"
diff --git a/docling_ibm_models/slanet_1m/export_model.py b/docling_ibm_models/slanet_1m/export_model.py
new file mode 100644
index 0000000..7e88ee6
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/export_model.py
@@ -0,0 +1,295 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "..")))
+
+import argparse
+
+import paddle
+from paddle.jit import to_static
+
+from modeling.architectures import build_model
+from paddleocr.ppocr.postprocess import build_post_process
+from paddleocr.ppocr.utils.save_load import load_model
+from paddleocr.ppocr.utils.logging import get_logger
+from program import load_config, merge_config, ArgsParser
+
+
+def export_single_model(
+    model, arch_config, save_path, logger, input_shape=None, quanter=None
+):
+    if arch_config["algorithm"] == "SRN":
+        max_text_length = arch_config["Head"]["max_text_length"]
+        other_shape = [
+            paddle.static.InputSpec(shape=[None, 1, 64, 256], dtype="float32"),
+            [
+                paddle.static.InputSpec(shape=[None, 256, 1], dtype="int64"),
+                paddle.static.InputSpec(
+                    shape=[None, max_text_length, 1], dtype="int64"
+                ),
+                paddle.static.InputSpec(
+                    shape=[None, 8, max_text_length, max_text_length], dtype="int64"
+                ),
+                paddle.static.InputSpec(
+                    shape=[None, 8, max_text_length, max_text_length], dtype="int64"
+                ),
+            ],
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "SAR":
+        other_shape = [
+            paddle.static.InputSpec(shape=[None, 3, 48, 160], dtype="float32"),
+            [paddle.static.InputSpec(shape=[None], dtype="float32")],
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] in ["SVTR_LCNet", "SVTR_HGNet"]:
+        other_shape = [
+            paddle.static.InputSpec(shape=[None, 3, 48, -1], dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] in ["SVTR", "CPPD"]:
+        other_shape = [
+            paddle.static.InputSpec(shape=[None] + input_shape, dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "PREN":
+        other_shape = [
+            paddle.static.InputSpec(shape=[None, 3, 64, 256], dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["model_type"] == "sr":
+        other_shape = [
+            paddle.static.InputSpec(shape=[None, 3, 16, 64], dtype="float32")
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "ViTSTR":
+        other_shape = [
+            paddle.static.InputSpec(shape=[None, 1, 224, 224], dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "ABINet":
+        if not input_shape:
+            input_shape = [3, 32, 128]
+        other_shape = [
+            paddle.static.InputSpec(shape=[None] + input_shape, dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] in ["NRTR", "SPIN", "RFL"]:
+        other_shape = [
+            paddle.static.InputSpec(shape=[None, 1, 32, 100], dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] in ["SATRN"]:
+        other_shape = [
+            paddle.static.InputSpec(shape=[None, 3, 32, 100], dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "VisionLAN":
+        other_shape = [
+            paddle.static.InputSpec(shape=[None, 3, 64, 256], dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "RobustScanner":
+        max_text_length = arch_config["Head"]["max_text_length"]
+        other_shape = [
+            paddle.static.InputSpec(shape=[None, 3, 48, 160], dtype="float32"),
+            [
+                paddle.static.InputSpec(
+                    shape=[
+                        None,
+                    ],
+                    dtype="float32",
+                ),
+                paddle.static.InputSpec(shape=[None, max_text_length], dtype="int64"),
+            ],
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "CAN":
+        other_shape = [
+            [
+                paddle.static.InputSpec(shape=[None, 1, None, None], dtype="float32"),
+                paddle.static.InputSpec(shape=[None, 1, None, None], dtype="float32"),
+                paddle.static.InputSpec(
+                    shape=[None, arch_config["Head"]["max_text_length"]], dtype="int64"
+                ),
+            ]
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] in ["LayoutLM", "LayoutLMv2", "LayoutXLM"]:
+        input_spec = [
+            paddle.static.InputSpec(shape=[None, 512], dtype="int64"),  # input_ids
+            paddle.static.InputSpec(shape=[None, 512, 4], dtype="int64"),  # bbox
+            paddle.static.InputSpec(shape=[None, 512], dtype="int64"),  # attention_mask
+            paddle.static.InputSpec(shape=[None, 512], dtype="int64"),  # token_type_ids
+            paddle.static.InputSpec(shape=[None, 3, 224, 224], dtype="int64"),  # image
+        ]
+        if "Re" in arch_config["Backbone"]["name"]:
+            input_spec.extend(
+                [
+                    paddle.static.InputSpec(
+                        shape=[None, 512, 3], dtype="int64"
+                    ),  # entities
+                    paddle.static.InputSpec(
+                        shape=[None, None, 2], dtype="int64"
+                    ),  # relations
+                ]
+            )
+        if model.backbone.use_visual_backbone is False:
+            input_spec.pop(4)
+        model = to_static(model, input_spec=[input_spec])
+    else:
+        infer_shape = [3, -1, -1]
+        if arch_config["model_type"] == "rec":
+            infer_shape = [3, 32, -1]  # for rec model, H must be 32
+            if (
+                "Transform" in arch_config
+                and arch_config["Transform"] is not None
+                and arch_config["Transform"]["name"] == "TPS"
+            ):
+                logger.info(
+                    "When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training"
+                )
+                infer_shape[-1] = 100
+        elif arch_config["model_type"] == "table":
+            infer_shape = [3, 488, 488]
+            if arch_config["algorithm"] == "TableMaster":
+                infer_shape = [3, 480, 480]
+            if arch_config["algorithm"] == "SLANet":
+                infer_shape = [3, -1, -1]
+        model = to_static(
+            model,
+            input_spec=[
+                paddle.static.InputSpec(shape=[None] + infer_shape, dtype="float32")
+            ],
+        )
+
+    if (
+        arch_config["model_type"] != "sr"
+        and arch_config["Backbone"]["name"] == "PPLCNetV3"
+    ):
+        # for rep lcnetv3
+        for layer in model.sublayers():
+            if hasattr(layer, "rep") and not getattr(layer, "is_repped"):
+                layer.rep()
+
+    if quanter is None:
+        paddle.jit.save(model, save_path)
+    else:
+        quanter.save_quantized_model(model, save_path)
+    logger.info("inference model is saved to {}".format(save_path))
+    return
+
+
+def main():
+    FLAGS = ArgsParser().parse_args()
+    config = load_config(FLAGS.config)
+    config = merge_config(config, FLAGS.opt)
+    logger = get_logger()
+    # build post process
+
+    post_process_class = build_post_process(config["PostProcess"], config["Global"])
+
+    # build model
+    # for rec algorithm
+    if hasattr(post_process_class, "character"):
+        char_num = len(getattr(post_process_class, "character"))
+        if config["Architecture"]["algorithm"] in [
+            "Distillation",
+        ]:  # distillation model
+            for key in config["Architecture"]["Models"]:
+                if (
+                    config["Architecture"]["Models"][key]["Head"]["name"] == "MultiHead"
+                ):  # multi head
+                    out_channels_list = {}
+                    if config["PostProcess"]["name"] == "DistillationSARLabelDecode":
+                        char_num = char_num - 2
+                    if config["PostProcess"]["name"] == "DistillationNRTRLabelDecode":
+                        char_num = char_num - 3
+                    out_channels_list["CTCLabelDecode"] = char_num
+                    out_channels_list["SARLabelDecode"] = char_num + 2
+                    out_channels_list["NRTRLabelDecode"] = char_num + 3
+                    config["Architecture"]["Models"][key]["Head"][
+                        "out_channels_list"
+                    ] = out_channels_list
+                else:
+                    config["Architecture"]["Models"][key]["Head"][
+                        "out_channels"
+                    ] = char_num
+                # just one final tensor needs to exported for inference
+                config["Architecture"]["Models"][key]["return_all_feats"] = False
+        elif config["Architecture"]["Head"]["name"] == "MultiHead":  # multi head
+            out_channels_list = {}
+            char_num = len(getattr(post_process_class, "character"))
+            if config["PostProcess"]["name"] == "SARLabelDecode":
+                char_num = char_num - 2
+            if config["PostProcess"]["name"] == "NRTRLabelDecode":
+                char_num = char_num - 3
+            out_channels_list["CTCLabelDecode"] = char_num
+            out_channels_list["SARLabelDecode"] = char_num + 2
+            out_channels_list["NRTRLabelDecode"] = char_num + 3
+            config["Architecture"]["Head"]["out_channels_list"] = out_channels_list
+        else:  # base rec model
+            config["Architecture"]["Head"]["out_channels"] = char_num
+
+    # for sr algorithm
+    if config["Architecture"]["model_type"] == "sr":
+        config["Architecture"]["Transform"]["infer_mode"] = True
+    model = build_model(config["Architecture"])
+    load_model(config, model, model_type=config["Architecture"]["model_type"])
+    model.eval()
+
+    save_path = config["Global"]["save_inference_dir"]
+
+    arch_config = config["Architecture"]
+
+    if (
+        arch_config["algorithm"] in ["SVTR", "CPPD"]
+        and arch_config["Head"]["name"] != "MultiHead"
+    ):
+        input_shape = config["Eval"]["dataset"]["transforms"][-2]["SVTRRecResizeImg"][
+            "image_shape"
+        ]
+    elif arch_config["algorithm"].lower() == "ABINet".lower():
+        rec_rs = [
+            c
+            for c in config["Eval"]["dataset"]["transforms"]
+            if "ABINetRecResizeImg" in c
+        ]
+        input_shape = rec_rs[0]["ABINetRecResizeImg"]["image_shape"] if rec_rs else None
+    else:
+        input_shape = None
+
+    if arch_config["algorithm"] in [
+        "Distillation",
+    ]:  # distillation model
+        archs = list(arch_config["Models"].values())
+        for idx, name in enumerate(model.model_name_list):
+            sub_model_save_path = os.path.join(save_path, name, "inference")
+            export_single_model(
+                model.model_list[idx], archs[idx], sub_model_save_path, logger
+            )
+    else:
+        save_path = os.path.join(save_path, "inference")
+        export_single_model(
+            model, arch_config, save_path, logger, input_shape=input_shape
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdiparams b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdiparams
new file mode 100644
index 0000000..2efedca
Binary files /dev/null and b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdiparams differ
diff --git a/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdiparams.info b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdiparams.info
new file mode 100644
index 0000000..622d87b
Binary files /dev/null and b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdiparams.info differ
diff --git a/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdmodel b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdmodel
new file mode 100644
index 0000000..0a6bf1e
Binary files /dev/null and b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdmodel differ
diff --git a/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdiparams b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdiparams
new file mode 100644
index 0000000..26ba0c9
Binary files /dev/null and b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdiparams differ
diff --git a/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdiparams.info b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000..1cdccfc
Binary files /dev/null and b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdiparams.info differ
diff --git a/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdmodel b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdmodel
new file mode 100644
index 0000000..5dfe4cf
Binary files /dev/null and b/docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdmodel differ
diff --git a/docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdiparams b/docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdiparams
new file mode 100644
index 0000000..3a12bbe
Binary files /dev/null and b/docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdiparams differ
diff --git a/docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdiparams.info b/docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdiparams.info
new file mode 100644
index 0000000..55c45ce
Binary files /dev/null and b/docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdiparams.info differ
diff --git a/docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdmodel b/docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdmodel
new file mode 100644
index 0000000..c522762
Binary files /dev/null and b/docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdmodel differ
diff --git a/docling_ibm_models/slanet_1m/kubernetes/train-job.yaml b/docling_ibm_models/slanet_1m/kubernetes/train-job.yaml
new file mode 100644
index 0000000..b1af14c
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/kubernetes/train-job.yaml
@@ -0,0 +1,88 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: train-job-${CI_PIPELINE_ID}
+  namespace: $NAMESPACE
+spec:
+  template:
+    spec:
+      containers:
+        - name: train-container
+          image: python:3.11
+          command: [ "bash", "-c" ]
+          args:
+            - |
+              # Install MinIO client
+              curl https://dl.min.io/client/mc/release/linux-amd64/mc --create-dirs -o ~/minio-binaries/mc
+              chmod +x $HOME/minio-binaries/mc
+              export PATH=$PATH:$HOME/minio-binaries/
+
+              # Set alias for MinIO server
+              mc alias set minio $ENDPOINT_URL $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY
+
+              # Clone the repository
+              git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@${CI_SERVER_HOST}/${CI_PROJECT_PATH}.git /repo
+              cd /repo
+
+              # Checkout the branch
+              git checkout $CI_COMMIT_REF_NAME
+              echo "Checking out branch $CI_COMMIT_REF_NAME"
+
+              # Set up environment
+              echo "BUCKET=$BUCKET" >> .env
+              echo "ENDPOINT_URL=$ENDPOINT_URL" >> .env
+              echo "REGION=$REGION" >> .env
+              echo "MODELS_BUCKET=$MODELS_BUCKET" >> .env
+              echo "AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID" >> .env
+              echo "AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" >> .env
+              export $(cat .env | xargs)
+
+              # Install dependencies
+              python3.11 -m venv .venv
+              source .venv/bin/activate
+              pip install --requirement requirements.txt --no-cache-dir
+              pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/
+              apt update && apt install --yes ffmpeg libsm6 libxext6
+
+              # Run training
+              dvc repro --pull
+
+              # Save the experiment metadata
+              dvc params diff main --md > experiment_report.md
+              dvc metrics diff main --md >> experiment_report.md
+
+              # Push the experiment metadata to MinIO
+              dvc push
+
+              mc cp experiment_report.md minio/$MODELS_BUCKET/experiments/experiment_report_${CI_MERGE_REQUEST_IID}.md
+              mc cp dvc.lock minio/$MODELS_BUCKET/experiments/dvc_lock_${CI_MERGE_REQUEST_IID}.lock
+          volumeMounts:
+            - name: repo-volume
+              mountPath: /repo
+          env:
+            - name: AWS_ACCESS_KEY_ID
+              valueFrom:
+                secretKeyRef:
+                  name: aws-credentials
+                  key: access_key_id
+            - name: AWS_SECRET_ACCESS_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: aws-credentials
+                  key: secret_access_key
+            - name: BUCKET
+              value: $BUCKET
+            - name: ENDPOINT_URL
+              value: $ENDPOINT_URL
+            - name: REGION
+              value: $REGION
+            - name: MODELS_BUCKET
+              value: $MODELS_BUCKET
+          resources:
+            limits:
+              nvidia.com/gpu-rtx-4090-24gb: 1
+      restartPolicy: Never
+      volumes:
+        - name: repo-volume
+          emptyDir: { }
+  backoffLimit: 2
diff --git a/docling_ibm_models/slanet_1m/losses/__init__.py b/docling_ibm_models/slanet_1m/losses/__init__.py
new file mode 100644
index 0000000..0b170d6
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/losses/__init__.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import paddle
+import paddle.nn as nn
+
+
+# table loss
+from .table_att_loss import TableAttentionLoss, SLALoss
+
+
+def build_loss(config):
+    support_dict = [
+        "TableAttentionLoss",
+        "SLALoss",
+
+    ]
+    config = copy.deepcopy(config)
+    module_name = config.pop("name")
+    assert module_name in support_dict, Exception(
+        "loss only support {}".format(support_dict)
+    )
+    module_class = eval(module_name)(**config)
+    return module_class
diff --git a/docling_ibm_models/slanet_1m/losses/table_att_loss.py b/docling_ibm_models/slanet_1m/losses/table_att_loss.py
new file mode 100644
index 0000000..5f0e780
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/losses/table_att_loss.py
@@ -0,0 +1,100 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+from paddle.nn import functional as F
+
+
+class TableAttentionLoss(nn.Layer):
+    def __init__(self, structure_weight, loc_weight, **kwargs):
+        super(TableAttentionLoss, self).__init__()
+        self.loss_func = nn.CrossEntropyLoss(weight=None, reduction="none")
+        self.structure_weight = structure_weight
+        self.loc_weight = loc_weight
+
+    def forward(self, predicts, batch):
+        structure_probs = predicts["structure_probs"]
+        structure_targets = batch[1].astype("int64")
+        structure_targets = structure_targets[:, 1:]
+        structure_probs = paddle.reshape(
+            structure_probs, [-1, structure_probs.shape[-1]]
+        )
+        structure_targets = paddle.reshape(structure_targets, [-1])
+        structure_loss = self.loss_func(structure_probs, structure_targets)
+
+        structure_loss = paddle.mean(structure_loss) * self.structure_weight
+
+        loc_preds = predicts["loc_preds"]
+        loc_targets = batch[2].astype("float32")
+        loc_targets_mask = batch[3].astype("float32")
+        loc_targets = loc_targets[:, 1:, :]
+        loc_targets_mask = loc_targets_mask[:, 1:, :]
+        loc_loss = (
+            F.mse_loss(loc_preds * loc_targets_mask, loc_targets) * self.loc_weight
+        )
+
+        total_loss = structure_loss + loc_loss
+        return {
+            "loss": total_loss,
+            "structure_loss": structure_loss,
+            "loc_loss": loc_loss,
+        }
+
+
+class SLALoss(nn.Layer):
+    def __init__(self, structure_weight, loc_weight, loc_loss="mse", **kwargs):
+        super(SLALoss, self).__init__()
+        self.loss_func = nn.CrossEntropyLoss(weight=None, reduction="mean")
+        self.structure_weight = structure_weight
+        self.loc_weight = loc_weight
+        self.loc_loss = loc_loss
+        self.eps = 1e-12
+
+    def forward(self, predicts, batch):
+        structure_probs = predicts["structure_probs"]
+        structure_targets = batch[1].astype("int64")
+        max_len = batch[-2].max()
+        structure_targets = structure_targets[:, 1 : max_len + 2]
+
+        structure_loss = self.loss_func(structure_probs, structure_targets)
+
+        structure_loss = paddle.mean(structure_loss) * self.structure_weight
+
+        loc_preds = predicts["loc_preds"]
+        loc_targets = batch[2].astype("float32")
+        loc_targets_mask = batch[3].astype("float32")
+        loc_targets = loc_targets[:, 1 : max_len + 2]
+        loc_targets_mask = loc_targets_mask[:, 1 : max_len + 2]
+
+        loc_loss = (
+            F.smooth_l1_loss(
+                loc_preds * loc_targets_mask,
+                loc_targets * loc_targets_mask,
+                reduction="sum",
+            )
+            * self.loc_weight
+        )
+
+        loc_loss = loc_loss / (loc_targets_mask.sum() + self.eps)
+        total_loss = structure_loss + loc_loss
+        return {
+            "loss": total_loss,
+            "structure_loss": structure_loss,
+            "loc_loss": loc_loss,
+        }
diff --git a/docling_ibm_models/slanet_1m/metrics/__init__.py b/docling_ibm_models/slanet_1m/metrics/__init__.py
new file mode 100644
index 0000000..83ddcd7
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/metrics/__init__.py
@@ -0,0 +1,54 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+
+__all__ = ["build_metric"]
+
+from .det_metric import DetMetric, DetFCEMetric
+from .rec_metric import RecMetric, CNTMetric, CANMetric
+from .table_metric import TableMetric
+
+
+
+def build_metric(config):
+    support_dict = [
+        "DetMetric",
+        "DetFCEMetric",
+        "RecMetric",
+        "ClsMetric",
+        "E2EMetric",
+        "DistillationMetric",
+        "TableMetric",
+        "KIEMetric",
+        "VQASerTokenMetric",
+        "VQAReTokenMetric",
+        "SRMetric",
+        "CTMetric",
+        "CNTMetric",
+        "CANMetric",
+    ]
+
+    config = copy.deepcopy(config)
+    module_name = config.pop("name")
+    assert module_name in support_dict, Exception(
+        "metric only support {}".format(support_dict)
+    )
+    module_class = eval(module_name)(**config)
+    return module_class
diff --git a/docling_ibm_models/slanet_1m/metrics/det_metric.py b/docling_ibm_models/slanet_1m/metrics/det_metric.py
new file mode 100644
index 0000000..be95ec3
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/metrics/det_metric.py
@@ -0,0 +1,153 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+__all__ = ["DetMetric", "DetFCEMetric"]
+
+from .eval_det_iou import DetectionIoUEvaluator
+
+
+class DetMetric(object):
+    def __init__(self, main_indicator="hmean", **kwargs):
+        self.evaluator = DetectionIoUEvaluator()
+        self.main_indicator = main_indicator
+        self.reset()
+
+    def __call__(self, preds, batch, **kwargs):
+        """
+        batch: a list produced by dataloaders.
+            image: np.ndarray  of shape (N, C, H, W).
+            ratio_list: np.ndarray  of shape(N,2)
+            polygons: np.ndarray  of shape (N, K, 4, 2), the polygons of objective regions.
+            ignore_tags: np.ndarray  of shape (N, K), indicates whether a region is ignorable or not.
+        preds: a list of dict produced by post process
+             points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
+        """
+        gt_polyons_batch = batch[2]
+        ignore_tags_batch = batch[3]
+        for pred, gt_polyons, ignore_tags in zip(
+            preds, gt_polyons_batch, ignore_tags_batch
+        ):
+            # prepare gt
+            gt_info_list = [
+                {"points": gt_polyon, "text": "", "ignore": ignore_tag}
+                for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)
+            ]
+            # prepare det
+            det_info_list = [
+                {"points": det_polyon, "text": ""} for det_polyon in pred["points"]
+            ]
+            result = self.evaluator.evaluate_image(gt_info_list, det_info_list)
+            self.results.append(result)
+
+    def get_metric(self):
+        """
+        return metrics {
+                 'precision': 0,
+                 'recall': 0,
+                 'hmean': 0
+            }
+        """
+
+        metrics = self.evaluator.combine_results(self.results)
+        self.reset()
+        return metrics
+
+    def reset(self):
+        self.results = []  # clear results
+
+
+class DetFCEMetric(object):
+    def __init__(self, main_indicator="hmean", **kwargs):
+        self.evaluator = DetectionIoUEvaluator()
+        self.main_indicator = main_indicator
+        self.reset()
+
+    def __call__(self, preds, batch, **kwargs):
+        """
+        batch: a list produced by dataloaders.
+            image: np.ndarray  of shape (N, C, H, W).
+            ratio_list: np.ndarray  of shape(N,2)
+            polygons: np.ndarray  of shape (N, K, 4, 2), the polygons of objective regions.
+            ignore_tags: np.ndarray  of shape (N, K), indicates whether a region is ignorable or not.
+        preds: a list of dict produced by post process
+             points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
+        """
+        gt_polyons_batch = batch[2]
+        ignore_tags_batch = batch[3]
+
+        for pred, gt_polyons, ignore_tags in zip(
+            preds, gt_polyons_batch, ignore_tags_batch
+        ):
+            # prepare gt
+            gt_info_list = [
+                {"points": gt_polyon, "text": "", "ignore": ignore_tag}
+                for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)
+            ]
+            # prepare det
+            det_info_list = [
+                {"points": det_polyon, "text": "", "score": score}
+                for det_polyon, score in zip(pred["points"], pred["scores"])
+            ]
+
+            for score_thr in self.results.keys():
+                det_info_list_thr = [
+                    det_info
+                    for det_info in det_info_list
+                    if det_info["score"] >= score_thr
+                ]
+                result = self.evaluator.evaluate_image(gt_info_list, det_info_list_thr)
+                self.results[score_thr].append(result)
+
+    def get_metric(self):
+        """
+        return metrics {'heman':0,
+            'thr 0.3':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.4':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.5':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.6':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.7':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.8':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.9':'precision: 0 recall: 0 hmean: 0',
+            }
+        """
+        metrics = {}
+        hmean = 0
+        for score_thr in self.results.keys():
+            metric = self.evaluator.combine_results(self.results[score_thr])
+            # for key, value in metric.items():
+            #     metrics['{}_{}'.format(key, score_thr)] = value
+            metric_str = "precision:{:.5f} recall:{:.5f} hmean:{:.5f}".format(
+                metric["precision"], metric["recall"], metric["hmean"]
+            )
+            metrics["thr {}".format(score_thr)] = metric_str
+            hmean = max(hmean, metric["hmean"])
+        metrics["hmean"] = hmean
+
+        self.reset()
+        return metrics
+
+    def reset(self):
+        self.results = {
+            0.3: [],
+            0.4: [],
+            0.5: [],
+            0.6: [],
+            0.7: [],
+            0.8: [],
+            0.9: [],
+        }  # clear results
diff --git a/docling_ibm_models/slanet_1m/metrics/eval_det_iou.py b/docling_ibm_models/slanet_1m/metrics/eval_det_iou.py
new file mode 100644
index 0000000..4ecce53
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/metrics/eval_det_iou.py
@@ -0,0 +1,257 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from collections import namedtuple
+import numpy as np
+from shapely.geometry import Polygon
+
+"""
+reference from:
+https://github.com/MhLiao/DB/blob/3c32b808d4412680310d3d28eeb6a2d5bf1566c5/concern/icdar2015_eval/detection/iou.py#L8
+"""
+
+
+class DetectionIoUEvaluator(object):
+    def __init__(self, iou_constraint=0.5, area_precision_constraint=0.5):
+        self.iou_constraint = iou_constraint
+        self.area_precision_constraint = area_precision_constraint
+
+    def evaluate_image(self, gt, pred):
+        def get_union(pD, pG):
+            return Polygon(pD).union(Polygon(pG)).area
+
+        def get_intersection_over_union(pD, pG):
+            return get_intersection(pD, pG) / get_union(pD, pG)
+
+        def get_intersection(pD, pG):
+            return Polygon(pD).intersection(Polygon(pG)).area
+
+        def compute_ap(confList, matchList, numGtCare):
+            correct = 0
+            AP = 0
+            if len(confList) > 0:
+                confList = np.array(confList)
+                matchList = np.array(matchList)
+                sorted_ind = np.argsort(-confList)
+                confList = confList[sorted_ind]
+                matchList = matchList[sorted_ind]
+                for n in range(len(confList)):
+                    match = matchList[n]
+                    if match:
+                        correct += 1
+                        AP += float(correct) / (n + 1)
+
+                if numGtCare > 0:
+                    AP /= numGtCare
+
+            return AP
+
+        perSampleMetrics = {}
+
+        matchedSum = 0
+
+        Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax")
+
+        numGlobalCareGt = 0
+        numGlobalCareDet = 0
+
+        arrGlobalConfidences = []
+        arrGlobalMatches = []
+
+        recall = 0
+        precision = 0
+        hmean = 0
+
+        detMatched = 0
+
+        iouMat = np.empty([1, 1])
+
+        gtPols = []
+        detPols = []
+
+        gtPolPoints = []
+        detPolPoints = []
+
+        # Array of Ground Truth Polygons' keys marked as don't Care
+        gtDontCarePolsNum = []
+        # Array of Detected Polygons' matched with a don't Care GT
+        detDontCarePolsNum = []
+
+        pairs = []
+        detMatchedNums = []
+
+        arrSampleConfidences = []
+        arrSampleMatch = []
+
+        evaluationLog = ""
+
+        for n in range(len(gt)):
+            points = gt[n]["points"]
+            dontCare = gt[n]["ignore"]
+            if not Polygon(points).is_valid:
+                continue
+
+            gtPol = points
+            gtPols.append(gtPol)
+            gtPolPoints.append(points)
+            if dontCare:
+                gtDontCarePolsNum.append(len(gtPols) - 1)
+
+        evaluationLog += (
+            "GT polygons: "
+            + str(len(gtPols))
+            + (
+                " (" + str(len(gtDontCarePolsNum)) + " don't care)\n"
+                if len(gtDontCarePolsNum) > 0
+                else "\n"
+            )
+        )
+
+        for n in range(len(pred)):
+            points = pred[n]["points"]
+            if not Polygon(points).is_valid:
+                continue
+
+            detPol = points
+            detPols.append(detPol)
+            detPolPoints.append(points)
+            if len(gtDontCarePolsNum) > 0:
+                for dontCarePol in gtDontCarePolsNum:
+                    dontCarePol = gtPols[dontCarePol]
+                    intersected_area = get_intersection(dontCarePol, detPol)
+                    pdDimensions = Polygon(detPol).area
+                    precision = (
+                        0 if pdDimensions == 0 else intersected_area / pdDimensions
+                    )
+                    if precision > self.area_precision_constraint:
+                        detDontCarePolsNum.append(len(detPols) - 1)
+                        break
+
+        evaluationLog += (
+            "DET polygons: "
+            + str(len(detPols))
+            + (
+                " (" + str(len(detDontCarePolsNum)) + " don't care)\n"
+                if len(detDontCarePolsNum) > 0
+                else "\n"
+            )
+        )
+
+        if len(gtPols) > 0 and len(detPols) > 0:
+            # Calculate IoU and precision matrixs
+            outputShape = [len(gtPols), len(detPols)]
+            iouMat = np.empty(outputShape)
+            gtRectMat = np.zeros(len(gtPols), np.int8)
+            detRectMat = np.zeros(len(detPols), np.int8)
+            for gtNum in range(len(gtPols)):
+                for detNum in range(len(detPols)):
+                    pG = gtPols[gtNum]
+                    pD = detPols[detNum]
+                    iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG)
+
+            for gtNum in range(len(gtPols)):
+                for detNum in range(len(detPols)):
+                    if (
+                        gtRectMat[gtNum] == 0
+                        and detRectMat[detNum] == 0
+                        and gtNum not in gtDontCarePolsNum
+                        and detNum not in detDontCarePolsNum
+                    ):
+                        if iouMat[gtNum, detNum] > self.iou_constraint:
+                            gtRectMat[gtNum] = 1
+                            detRectMat[detNum] = 1
+                            detMatched += 1
+                            pairs.append({"gt": gtNum, "det": detNum})
+                            detMatchedNums.append(detNum)
+                            evaluationLog += (
+                                "Match GT #"
+                                + str(gtNum)
+                                + " with Det #"
+                                + str(detNum)
+                                + "\n"
+                            )
+
+        numGtCare = len(gtPols) - len(gtDontCarePolsNum)
+        numDetCare = len(detPols) - len(detDontCarePolsNum)
+        if numGtCare == 0:
+            recall = float(1)
+            precision = float(0) if numDetCare > 0 else float(1)
+        else:
+            recall = float(detMatched) / numGtCare
+            precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare
+
+        hmean = (
+            0
+            if (precision + recall) == 0
+            else 2.0 * precision * recall / (precision + recall)
+        )
+
+        matchedSum += detMatched
+        numGlobalCareGt += numGtCare
+        numGlobalCareDet += numDetCare
+
+        perSampleMetrics = {
+            "gtCare": numGtCare,
+            "detCare": numDetCare,
+            "detMatched": detMatched,
+        }
+        return perSampleMetrics
+
+    def combine_results(self, results):
+        numGlobalCareGt = 0
+        numGlobalCareDet = 0
+        matchedSum = 0
+        for result in results:
+            numGlobalCareGt += result["gtCare"]
+            numGlobalCareDet += result["detCare"]
+            matchedSum += result["detMatched"]
+
+        methodRecall = (
+            0 if numGlobalCareGt == 0 else float(matchedSum) / numGlobalCareGt
+        )
+        methodPrecision = (
+            0 if numGlobalCareDet == 0 else float(matchedSum) / numGlobalCareDet
+        )
+        methodHmean = (
+            0
+            if methodRecall + methodPrecision == 0
+            else 2 * methodRecall * methodPrecision / (methodRecall + methodPrecision)
+        )
+        methodMetrics = {
+            "precision": methodPrecision,
+            "recall": methodRecall,
+            "hmean": methodHmean,
+        }
+
+        return methodMetrics
+
+
+if __name__ == "__main__":
+    evaluator = DetectionIoUEvaluator()
+    gts = [
+        [
+            {
+                "points": [(0, 0), (1, 0), (1, 1), (0, 1)],
+                "text": 1234,
+                "ignore": False,
+            },
+            {
+                "points": [(2, 2), (3, 2), (3, 3), (2, 3)],
+                "text": 5678,
+                "ignore": False,
+            },
+        ]
+    ]
+    preds = [
+        [
+            {
+                "points": [(0.1, 0.1), (1, 0), (1, 1), (0, 1)],
+                "text": 123,
+                "ignore": False,
+            }
+        ]
+    ]
+    results = []
+    for gt, pred in zip(gts, preds):
+        results.append(evaluator.evaluate_image(gt, pred))
+    metrics = evaluator.combine_results(results)
+    print(metrics)
diff --git a/docling_ibm_models/slanet_1m/metrics/rec_metric.py b/docling_ibm_models/slanet_1m/metrics/rec_metric.py
new file mode 100644
index 0000000..e41dd36
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/metrics/rec_metric.py
@@ -0,0 +1,179 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rapidfuzz.distance import Levenshtein
+from difflib import SequenceMatcher
+
+import numpy as np
+import string
+
+
+class RecMetric(object):
+    def __init__(
+        self, main_indicator="acc", is_filter=False, ignore_space=True, **kwargs
+    ):
+        self.main_indicator = main_indicator
+        self.is_filter = is_filter
+        self.ignore_space = ignore_space
+        self.eps = 1e-5
+        self.reset()
+
+    def _normalize_text(self, text):
+        text = "".join(
+            filter(lambda x: x in (string.digits + string.ascii_letters), text)
+        )
+        return text.lower()
+
+    def __call__(self, pred_label, *args, **kwargs):
+        preds, labels = pred_label
+        correct_num = 0
+        all_num = 0
+        norm_edit_dis = 0.0
+        for (pred, pred_conf), (target, _) in zip(preds, labels):
+            if self.ignore_space:
+                pred = pred.replace(" ", "")
+                target = target.replace(" ", "")
+            if self.is_filter:
+                pred = self._normalize_text(pred)
+                target = self._normalize_text(target)
+            norm_edit_dis += Levenshtein.normalized_distance(pred, target)
+            if pred == target:
+                correct_num += 1
+            all_num += 1
+        self.correct_num += correct_num
+        self.all_num += all_num
+        self.norm_edit_dis += norm_edit_dis
+        return {
+            "acc": correct_num / (all_num + self.eps),
+            "norm_edit_dis": 1 - norm_edit_dis / (all_num + self.eps),
+        }
+
+    def get_metric(self):
+        """
+        return metrics {
+                 'acc': 0,
+                 'norm_edit_dis': 0,
+            }
+        """
+        acc = 1.0 * self.correct_num / (self.all_num + self.eps)
+        norm_edit_dis = 1 - self.norm_edit_dis / (self.all_num + self.eps)
+        self.reset()
+        return {"acc": acc, "norm_edit_dis": norm_edit_dis}
+
+    def reset(self):
+        self.correct_num = 0
+        self.all_num = 0
+        self.norm_edit_dis = 0
+
+
+class CNTMetric(object):
+    def __init__(self, main_indicator="acc", **kwargs):
+        self.main_indicator = main_indicator
+        self.eps = 1e-5
+        self.reset()
+
+    def __call__(self, pred_label, *args, **kwargs):
+        preds, labels = pred_label
+        correct_num = 0
+        all_num = 0
+        for pred, target in zip(preds, labels):
+            if pred == target:
+                correct_num += 1
+            all_num += 1
+        self.correct_num += correct_num
+        self.all_num += all_num
+        return {
+            "acc": correct_num / (all_num + self.eps),
+        }
+
+    def get_metric(self):
+        """
+        return metrics {
+                 'acc': 0,
+            }
+        """
+        acc = 1.0 * self.correct_num / (self.all_num + self.eps)
+        self.reset()
+        return {"acc": acc}
+
+    def reset(self):
+        self.correct_num = 0
+        self.all_num = 0
+
+
+class CANMetric(object):
+    def __init__(self, main_indicator="exp_rate", **kwargs):
+        self.main_indicator = main_indicator
+        self.word_right = []
+        self.exp_right = []
+        self.word_total_length = 0
+        self.exp_total_num = 0
+        self.word_rate = 0
+        self.exp_rate = 0
+        self.reset()
+        self.epoch_reset()
+
+    def __call__(self, preds, batch, **kwargs):
+        for k, v in kwargs.items():
+            epoch_reset = v
+            if epoch_reset:
+                self.epoch_reset()
+        word_probs = preds
+        word_label, word_label_mask = batch
+        line_right = 0
+        if word_probs is not None:
+            word_pred = word_probs.argmax(2)
+        word_pred = word_pred.cpu().detach().numpy()
+        word_scores = [
+            SequenceMatcher(
+                None, s1[: int(np.sum(s3))], s2[: int(np.sum(s3))], autojunk=False
+            ).ratio()
+            * (len(s1[: int(np.sum(s3))]) + len(s2[: int(np.sum(s3))]))
+            / len(s1[: int(np.sum(s3))])
+            / 2
+            for s1, s2, s3 in zip(word_label, word_pred, word_label_mask)
+        ]
+        batch_size = len(word_scores)
+        for i in range(batch_size):
+            if word_scores[i] == 1:
+                line_right += 1
+        self.word_rate = np.mean(word_scores)  # float
+        self.exp_rate = line_right / batch_size  # float
+        exp_length, word_length = word_label.shape[:2]
+        self.word_right.append(self.word_rate * word_length)
+        self.exp_right.append(self.exp_rate * exp_length)
+        self.word_total_length = self.word_total_length + word_length
+        self.exp_total_num = self.exp_total_num + exp_length
+
+    def get_metric(self):
+        """
+        return {
+            'word_rate': 0,
+            "exp_rate": 0,
+        }
+        """
+        cur_word_rate = sum(self.word_right) / self.word_total_length
+        cur_exp_rate = sum(self.exp_right) / self.exp_total_num
+        self.reset()
+        return {"word_rate": cur_word_rate, "exp_rate": cur_exp_rate}
+
+    def reset(self):
+        self.word_rate = 0
+        self.exp_rate = 0
+
+    def epoch_reset(self):
+        self.word_right = []
+        self.exp_right = []
+        self.word_total_length = 0
+        self.exp_total_num = 0
diff --git a/docling_ibm_models/slanet_1m/metrics/table_metric.py b/docling_ibm_models/slanet_1m/metrics/table_metric.py
new file mode 100644
index 0000000..6df2bb1
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/metrics/table_metric.py
@@ -0,0 +1,161 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from metrics.det_metric import DetMetric
+
+
+class TableStructureMetric(object):
+    def __init__(self, main_indicator="acc", eps=1e-6, del_thead_tbody=False, **kwargs):
+        self.main_indicator = main_indicator
+        self.eps = eps
+        self.del_thead_tbody = del_thead_tbody
+        self.reset()
+
+    def __call__(self, pred_label, batch=None, *args, **kwargs):
+        preds, labels = pred_label
+        pred_structure_batch_list = preds["structure_batch_list"]
+        gt_structure_batch_list = labels["structure_batch_list"]
+        correct_num = 0
+        all_num = 0
+        for (pred, pred_conf), target in zip(
+            pred_structure_batch_list, gt_structure_batch_list
+        ):
+            pred_str = "".join(pred)
+            target_str = "".join(target)
+            if self.del_thead_tbody:
+                pred_str = (
+                    pred_str.replace("<thead>", "")
+                    .replace("</thead>", "")
+                    .replace("<tbody>", "")
+                    .replace("</tbody>", "")
+                )
+                target_str = (
+                    target_str.replace("<thead>", "")
+                    .replace("</thead>", "")
+                    .replace("<tbody>", "")
+                    .replace("</tbody>", "")
+                )
+            if pred_str == target_str:
+                correct_num += 1
+            all_num += 1
+        self.correct_num += correct_num
+        self.all_num += all_num
+
+    def get_metric(self):
+        """
+        return metrics {
+                 'acc': 0,
+            }
+        """
+        acc = 1.0 * self.correct_num / (self.all_num + self.eps)
+        self.reset()
+        return {"acc": acc}
+
+    def reset(self):
+        self.correct_num = 0
+        self.all_num = 0
+        self.len_acc_num = 0
+        self.token_nums = 0
+        self.anys_dict = dict()
+
+
+class TableMetric(object):
+    def __init__(
+        self,
+        main_indicator="acc",
+        compute_bbox_metric=False,
+        box_format="xyxy",
+        del_thead_tbody=False,
+        **kwargs,
+    ):
+        """
+
+        @param sub_metrics: configs of sub_metric
+        @param main_matric: main_matric for save best_model
+        @param kwargs:
+        """
+        self.structure_metric = TableStructureMetric(del_thead_tbody=del_thead_tbody)
+        self.bbox_metric = DetMetric() if compute_bbox_metric else None
+        self.main_indicator = main_indicator
+        self.box_format = box_format
+        self.reset()
+
+    def __call__(self, pred_label, batch=None, *args, **kwargs):
+        self.structure_metric(pred_label)
+        if self.bbox_metric is not None:
+            self.bbox_metric(*self.prepare_bbox_metric_input(pred_label))
+
+    def prepare_bbox_metric_input(self, pred_label):
+        pred_bbox_batch_list = []
+        gt_ignore_tags_batch_list = []
+        gt_bbox_batch_list = []
+        preds, labels = pred_label
+
+        batch_num = len(preds["bbox_batch_list"])
+        for batch_idx in range(batch_num):
+            # pred
+            pred_bbox_list = [
+                self.format_box(pred_box)
+                for pred_box in preds["bbox_batch_list"][batch_idx]
+            ]
+            pred_bbox_batch_list.append({"points": pred_bbox_list})
+
+            # gt
+            gt_bbox_list = []
+            gt_ignore_tags_list = []
+            for gt_box in labels["bbox_batch_list"][batch_idx]:
+                gt_bbox_list.append(self.format_box(gt_box))
+                gt_ignore_tags_list.append(0)
+            gt_bbox_batch_list.append(gt_bbox_list)
+            gt_ignore_tags_batch_list.append(gt_ignore_tags_list)
+
+        return [
+            pred_bbox_batch_list,
+            [0, 0, gt_bbox_batch_list, gt_ignore_tags_batch_list],
+        ]
+
+    def get_metric(self):
+        structure_metric = self.structure_metric.get_metric()
+        if self.bbox_metric is None:
+            return structure_metric
+        bbox_metric = self.bbox_metric.get_metric()
+        if self.main_indicator == self.bbox_metric.main_indicator:
+            output = bbox_metric
+            for sub_key in structure_metric:
+                output["structure_metric_{}".format(sub_key)] = structure_metric[
+                    sub_key
+                ]
+        else:
+            output = structure_metric
+            for sub_key in bbox_metric:
+                output["bbox_metric_{}".format(sub_key)] = bbox_metric[sub_key]
+        return output
+
+    def reset(self):
+        self.structure_metric.reset()
+        if self.bbox_metric is not None:
+            self.bbox_metric.reset()
+
+    def format_box(self, box):
+        if self.box_format == "xyxy":
+            x1, y1, x2, y2 = box
+            box = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
+        elif self.box_format == "xywh":
+            x, y, w, h = box
+            x1, y1, x2, y2 = x - w // 2, y - h // 2, x + w // 2, y + h // 2
+            box = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
+        elif self.box_format == "xyxyxyxy":
+            x1, y1, x2, y2, x3, y3, x4, y4 = box
+            box = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
+        return box
diff --git a/docling_ibm_models/slanet_1m/model_final/inference.pdiparams b/docling_ibm_models/slanet_1m/model_final/inference.pdiparams
new file mode 100644
index 0000000..88e0aed
Binary files /dev/null and b/docling_ibm_models/slanet_1m/model_final/inference.pdiparams differ
diff --git a/docling_ibm_models/slanet_1m/model_final/inference.pdiparams.info b/docling_ibm_models/slanet_1m/model_final/inference.pdiparams.info
new file mode 100644
index 0000000..dee1aaf
Binary files /dev/null and b/docling_ibm_models/slanet_1m/model_final/inference.pdiparams.info differ
diff --git a/docling_ibm_models/slanet_1m/model_final/inference.pdmodel b/docling_ibm_models/slanet_1m/model_final/inference.pdmodel
new file mode 100644
index 0000000..5f8e979
Binary files /dev/null and b/docling_ibm_models/slanet_1m/model_final/inference.pdmodel differ
diff --git a/docling_ibm_models/slanet_1m/model_final/inference.yml b/docling_ibm_models/slanet_1m/model_final/inference.yml
new file mode 100644
index 0000000..b83eae1
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/model_final/inference.yml
@@ -0,0 +1,72 @@
+PreProcess:
+  transform_ops:
+  - DecodeImage:
+      channel_first: false
+      img_mode: BGR
+  - TableLabelEncode:
+      learn_empty_box: false
+      loc_reg_num: 4
+      max_text_length: 500
+      merge_no_span_structure: true
+      replace_empty_cell_token: false
+  - TableBoxEncode:
+      in_box_format: xyxy
+      out_box_format: xyxy
+  - ResizeTableImage:
+      max_len: 488
+  - NormalizeImage:
+      mean:
+      - 0.485
+      - 0.456
+      - 0.406
+      order: hwc
+      scale: 1./255.
+      std:
+      - 0.229
+      - 0.224
+      - 0.225
+  - PaddingTableImage:
+      size:
+      - 488
+      - 488
+  - ToCHWImage: null
+  - KeepKeys:
+      keep_keys:
+      - image
+      - structure
+      - bboxes
+      - bbox_masks
+      - length
+      - shape
+PostProcess:
+  name: TableLabelDecode
+  merge_no_span_structure: true
+  character_dict:
+  - <thead>
+  - <tr>
+  - <td>
+  - </td>
+  - </tr>
+  - </thead>
+  - <tbody>
+  - </tbody>
+  - <td
+  - ' colspan="5"'
+  - '>'
+  - ' colspan="2"'
+  - ' colspan="3"'
+  - ' rowspan="2"'
+  - ' colspan="4"'
+  - ' colspan="6"'
+  - ' rowspan="3"'
+  - ' colspan="9"'
+  - ' colspan="10"'
+  - ' colspan="7"'
+  - ' rowspan="4"'
+  - ' rowspan="5"'
+  - ' rowspan="9"'
+  - ' colspan="8"'
+  - ' rowspan="8"'
+  - ' rowspan="6"'
+  - ' rowspan="7"'
+  - ' rowspan="10"'
diff --git a/docling_ibm_models/slanet_1m/modeling/architectures/__init__.py b/docling_ibm_models/slanet_1m/modeling/architectures/__init__.py
new file mode 100644
index 0000000..50260f9
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/architectures/__init__.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import importlib
+
+from paddle.jit import to_static
+from paddle.static import InputSpec
+
+from .base_model import BaseModel
+from .distillation_model import DistillationModel
+
+__all__ = ["build_model", "apply_to_static"]
+
+
+def build_model(config):
+    config = copy.deepcopy(config)
+    if not "name" in config:
+        arch = BaseModel(config)
+    else:
+        name = config.pop("name")
+        mod = importlib.import_module(__name__)
+        arch = getattr(mod, name)(config)
+    return arch
+
+
+def apply_to_static(model, config, logger):
+    if config["Global"].get("to_static", False) is not True:
+        return model
+    assert (
+        "d2s_train_image_shape" in config["Global"]
+    ), "d2s_train_image_shape must be assigned for static training mode..."
+    supported_list = ["DB", "SVTR_LCNet", "TableMaster", "LayoutXLM", "SLANet", "SVTR"]
+    if config["Architecture"]["algorithm"] in ["Distillation"]:
+        algo = list(config["Architecture"]["Models"].values())[0]["algorithm"]
+    else:
+        algo = config["Architecture"]["algorithm"]
+    assert (
+        algo in supported_list
+    ), f"algorithms that supports static training must in in {supported_list} but got {algo}"
+
+    specs = [
+        InputSpec([None] + config["Global"]["d2s_train_image_shape"], dtype="float32")
+    ]
+
+    if algo == "SVTR_LCNet":
+        specs.append(
+            [
+                InputSpec([None, config["Global"]["max_text_length"]], dtype="int64"),
+                InputSpec([None, config["Global"]["max_text_length"]], dtype="int64"),
+                InputSpec([None], dtype="int64"),
+                InputSpec([None], dtype="float64"),
+            ]
+        )
+    elif algo == "TableMaster":
+        specs.append(
+            [
+                InputSpec([None, config["Global"]["max_text_length"]], dtype="int64"),
+                InputSpec(
+                    [None, config["Global"]["max_text_length"], 4], dtype="float32"
+                ),
+                InputSpec(
+                    [None, config["Global"]["max_text_length"], 1], dtype="float32"
+                ),
+                InputSpec([None, 6], dtype="float32"),
+            ]
+        )
+    elif algo == "LayoutXLM":
+        specs = [
+            [
+                InputSpec(shape=[None, 512], dtype="int64"),  # input_ids
+                InputSpec(shape=[None, 512, 4], dtype="int64"),  # bbox
+                InputSpec(shape=[None, 512], dtype="int64"),  # attention_mask
+                InputSpec(shape=[None, 512], dtype="int64"),  # token_type_ids
+                InputSpec(shape=[None, 3, 224, 224], dtype="float32"),  # image
+                InputSpec(shape=[None, 512], dtype="int64"),  # label
+            ]
+        ]
+    elif algo == "SLANet":
+        specs.append(
+            [
+                InputSpec(
+                    [None, config["Global"]["max_text_length"] + 2], dtype="int64"
+                ),
+                InputSpec(
+                    [None, config["Global"]["max_text_length"] + 2, 4], dtype="float32"
+                ),
+                InputSpec(
+                    [None, config["Global"]["max_text_length"] + 2, 1], dtype="float32"
+                ),
+                InputSpec([None], dtype="int64"),
+                InputSpec([None, 6], dtype="float64"),
+            ]
+        )
+    elif algo == "SVTR":
+        specs.append(
+            [
+                InputSpec([None, config["Global"]["max_text_length"]], dtype="int64"),
+                InputSpec([None], dtype="int64"),
+            ]
+        )
+    model = to_static(model, input_spec=specs)
+    logger.info("Successfully to apply @to_static with specs: {}".format(specs))
+    return model
diff --git a/docling_ibm_models/slanet_1m/modeling/architectures/base_model.py b/docling_ibm_models/slanet_1m/modeling/architectures/base_model.py
new file mode 100644
index 0000000..c1b6116
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/architectures/base_model.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import nn
+from modeling.backbones import build_backbone
+from modeling.necks import build_neck
+from modeling.heads import build_head
+
+__all__ = ["BaseModel"]
+
+
+class BaseModel(nn.Layer):
+    def __init__(self, config):
+        """
+        the module for OCR.
+        args:
+            config (dict): the super parameters for module.
+        """
+        super(BaseModel, self).__init__()
+        in_channels = config.get("in_channels", 3)
+        model_type = config["model_type"]
+        # build transfrom,
+        # for rec, transfrom can be TPS,None
+        # for det and cls, transfrom shoule to be None,
+        # if you make model differently, you can use transfrom in det and cls
+        # build backbone, backbone is need for del, rec and cls
+        self.use_transform = False
+        if "Backbone" not in config or config["Backbone"] is None:
+            self.use_backbone = False
+        else:
+            self.use_backbone = True
+            config["Backbone"]["in_channels"] = in_channels
+            self.backbone = build_backbone(config["Backbone"], model_type)
+            in_channels = self.backbone.out_channels
+
+        # build neck
+        # for rec, neck can be cnn,rnn or reshape(None)
+        # for det, neck can be FPN, BIFPN and so on.
+        # for cls, neck should be none
+        if "Neck" not in config or config["Neck"] is None:
+            self.use_neck = False
+        else:
+            self.use_neck = True
+            config["Neck"]["in_channels"] = in_channels
+            self.neck = build_neck(config["Neck"])
+            in_channels = self.neck.out_channels
+
+        # # build head, head is need for det, rec and cls
+        if "Head" not in config or config["Head"] is None:
+            self.use_head = False
+        else:
+            self.use_head = True
+            config["Head"]["in_channels"] = in_channels
+            self.head = build_head(config["Head"])
+
+        self.return_all_feats = config.get("return_all_feats", False)
+
+    def forward(self, x, data=None):
+        y = dict()
+        if self.use_transform:
+            x = self.transform(x)
+        if self.use_backbone:
+            x = self.backbone(x)
+        if isinstance(x, dict):
+            y.update(x)
+        else:
+            y["backbone_out"] = x
+        final_name = "backbone_out"
+        if self.use_neck:
+            x = self.neck(x)
+            if isinstance(x, dict):
+                y.update(x)
+            else:
+                y["neck_out"] = x
+            final_name = "neck_out"
+        if self.use_head:
+            x = self.head(x, targets=data)
+            # for multi head, save ctc neck out for udml
+            if isinstance(x, dict) and "ctc_neck" in x.keys():
+                y["neck_out"] = x["ctc_neck"]
+                y["head_out"] = x
+            elif isinstance(x, dict):
+                y.update(x)
+            else:
+                y["head_out"] = x
+            final_name = "head_out"
+        if self.return_all_feats:
+            if self.training:
+                return y
+            elif isinstance(x, dict):
+                return x
+            else:
+                return {final_name: x}
+        else:
+            return x
diff --git a/docling_ibm_models/slanet_1m/modeling/architectures/distillation_model.py b/docling_ibm_models/slanet_1m/modeling/architectures/distillation_model.py
new file mode 100644
index 0000000..98912d1
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/architectures/distillation_model.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import nn
+from modeling.backbones import build_backbone
+from modeling.necks import build_neck
+from modeling.heads import build_head
+from .base_model import BaseModel
+from paddleocr.ppocr.utils.save_load import load_pretrained_params
+
+__all__ = ["DistillationModel"]
+
+
+class DistillationModel(nn.Layer):
+    def __init__(self, config):
+        """
+        the module for OCR distillation.
+        args:
+            config (dict): the super parameters for module.
+        """
+        super().__init__()
+        self.model_list = []
+        self.model_name_list = []
+        for key in config["Models"]:
+            model_config = config["Models"][key]
+            freeze_params = False
+            pretrained = None
+            if "freeze_params" in model_config:
+                freeze_params = model_config.pop("freeze_params")
+            if "pretrained" in model_config:
+                pretrained = model_config.pop("pretrained")
+            model = BaseModel(model_config)
+            if pretrained is not None:
+                load_pretrained_params(model, pretrained)
+            if freeze_params:
+                for param in model.parameters():
+                    param.trainable = False
+            self.model_list.append(self.add_sublayer(key, model))
+            self.model_name_list.append(key)
+
+    def forward(self, x, data=None):
+        result_dict = dict()
+        for idx, model_name in enumerate(self.model_name_list):
+            result_dict[model_name] = self.model_list[idx](x, data)
+        return result_dict
diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/__init__.py b/docling_ibm_models/slanet_1m/modeling/backbones/__init__.py
new file mode 100644
index 0000000..e91813e
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/backbones/__init__.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = ["build_backbone"]
+
+
+def build_backbone(config, model_type):
+    if model_type == "table":
+        from .det_pp_lcnet import PPLCNet
+        from .rec_lcnetv3 import PPLCNetV3
+        from .det_pp_lcnet_v2 import PPLCNetV2_base
+
+        support_dict = [
+            "PPLCNet",
+            "PPLCNetV3",
+            "PPLCNetV2_base",
+        ]
+    else:
+        raise NotImplementedError
+
+    module_name = config.pop("name")
+    assert module_name in support_dict, Exception(
+        "when model typs is {}, backbone only support {}".format(
+            model_type, support_dict
+        )
+    )
+    module_class = eval(module_name)(**config)
+    return module_class
diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/det_mobilenet_v3.py b/docling_ibm_models/slanet_1m/modeling/backbones/det_mobilenet_v3.py
new file mode 100644
index 0000000..98db44b
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/backbones/det_mobilenet_v3.py
@@ -0,0 +1,285 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+__all__ = ["MobileNetV3"]
+
+
+def make_divisible(v, divisor=8, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class MobileNetV3(nn.Layer):
+    def __init__(
+        self, in_channels=3, model_name="large", scale=0.5, disable_se=False, **kwargs
+    ):
+        """
+        the MobilenetV3 backbone network for detection module.
+        Args:
+            params(dict): the super parameters for build network
+        """
+        super(MobileNetV3, self).__init__()
+
+        self.disable_se = disable_se
+
+        if model_name == "large":
+            cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, False, "relu", 1],
+                [3, 64, 24, False, "relu", 2],
+                [3, 72, 24, False, "relu", 1],
+                [5, 72, 40, True, "relu", 2],
+                [5, 120, 40, True, "relu", 1],
+                [5, 120, 40, True, "relu", 1],
+                [3, 240, 80, False, "hardswish", 2],
+                [3, 200, 80, False, "hardswish", 1],
+                [3, 184, 80, False, "hardswish", 1],
+                [3, 184, 80, False, "hardswish", 1],
+                [3, 480, 112, True, "hardswish", 1],
+                [3, 672, 112, True, "hardswish", 1],
+                [5, 672, 160, True, "hardswish", 2],
+                [5, 960, 160, True, "hardswish", 1],
+                [5, 960, 160, True, "hardswish", 1],
+            ]
+            cls_ch_squeeze = 960
+        elif model_name == "small":
+            cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, True, "relu", 2],
+                [3, 72, 24, False, "relu", 2],
+                [3, 88, 24, False, "relu", 1],
+                [5, 96, 40, True, "hardswish", 2],
+                [5, 240, 40, True, "hardswish", 1],
+                [5, 240, 40, True, "hardswish", 1],
+                [5, 120, 48, True, "hardswish", 1],
+                [5, 144, 48, True, "hardswish", 1],
+                [5, 288, 96, True, "hardswish", 2],
+                [5, 576, 96, True, "hardswish", 1],
+                [5, 576, 96, True, "hardswish", 1],
+            ]
+            cls_ch_squeeze = 576
+        else:
+            raise NotImplementedError(
+                "mode[" + model_name + "_model] is not implemented!"
+            )
+
+        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
+        assert (
+            scale in supported_scale
+        ), "supported scale are {} but input scale is {}".format(supported_scale, scale)
+        inplanes = 16
+        # conv1
+        self.conv = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=make_divisible(inplanes * scale),
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            groups=1,
+            if_act=True,
+            act="hardswish",
+        )
+
+        self.stages = []
+        self.out_channels = []
+        block_list = []
+        i = 0
+        inplanes = make_divisible(inplanes * scale)
+        for k, exp, c, se, nl, s in cfg:
+            se = se and not self.disable_se
+            start_idx = 2 if model_name == "large" else 0
+            if s == 2 and i > start_idx:
+                self.out_channels.append(inplanes)
+                self.stages.append(nn.Sequential(*block_list))
+                block_list = []
+            block_list.append(
+                ResidualUnit(
+                    in_channels=inplanes,
+                    mid_channels=make_divisible(scale * exp),
+                    out_channels=make_divisible(scale * c),
+                    kernel_size=k,
+                    stride=s,
+                    use_se=se,
+                    act=nl,
+                )
+            )
+            inplanes = make_divisible(scale * c)
+            i += 1
+        block_list.append(
+            ConvBNLayer(
+                in_channels=inplanes,
+                out_channels=make_divisible(scale * cls_ch_squeeze),
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                groups=1,
+                if_act=True,
+                act="hardswish",
+            )
+        )
+        self.stages.append(nn.Sequential(*block_list))
+        self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
+        for i, stage in enumerate(self.stages):
+            self.add_sublayer(sublayer=stage, name="stage{}".format(i))
+
+    def forward(self, x):
+        x = self.conv(x)
+        out_list = []
+        for stage in self.stages:
+            x = stage(x)
+            out_list.append(x)
+        return out_list
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        padding,
+        groups=1,
+        if_act=True,
+        act=None,
+    ):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            bias_attr=False,
+        )
+
+        self.bn = nn.BatchNorm(num_channels=out_channels, act=None)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.if_act:
+            if self.act == "relu":
+                x = F.relu(x)
+            elif self.act == "hardswish":
+                x = F.hardswish(x)
+            else:
+                print(
+                    "The activation function({}) is selected incorrectly.".format(
+                        self.act
+                    )
+                )
+                exit()
+        return x
+
+
+class ResidualUnit(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        mid_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        use_se,
+        act=None,
+    ):
+        super(ResidualUnit, self).__init__()
+        self.if_shortcut = stride == 1 and in_channels == out_channels
+        self.if_se = use_se
+
+        self.expand_conv = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=mid_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            if_act=True,
+            act=act,
+        )
+        self.bottleneck_conv = ConvBNLayer(
+            in_channels=mid_channels,
+            out_channels=mid_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=int((kernel_size - 1) // 2),
+            groups=mid_channels,
+            if_act=True,
+            act=act,
+        )
+        if self.if_se:
+            self.mid_se = SEModule(mid_channels)
+        self.linear_conv = ConvBNLayer(
+            in_channels=mid_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            if_act=False,
+            act=None,
+        )
+
+    def forward(self, inputs):
+        x = self.expand_conv(inputs)
+        x = self.bottleneck_conv(x)
+        if self.if_se:
+            x = self.mid_se(x)
+        x = self.linear_conv(x)
+        if self.if_shortcut:
+            x = paddle.add(inputs, x)
+        return x
+
+
+class SEModule(nn.Layer):
+    def __init__(self, in_channels, reduction=4):
+        super(SEModule, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
+        self.conv1 = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=in_channels // reduction,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        self.conv2 = nn.Conv2D(
+            in_channels=in_channels // reduction,
+            out_channels=in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+
+    def forward(self, inputs):
+        outputs = self.avg_pool(inputs)
+        outputs = self.conv1(outputs)
+        outputs = F.relu(outputs)
+        outputs = self.conv2(outputs)
+        outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5)
+        return inputs * outputs
diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet.py b/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet.py
new file mode 100644
index 0000000..bf557a4
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet.py
@@ -0,0 +1,274 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division, print_function
+
+import os
+import paddle
+import paddle.nn as nn
+from paddle import ParamAttr
+from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import KaimingNormal
+from paddle.utils.download import get_path_from_url
+
+MODEL_URLS = {
+    "PPLCNet_x0.25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_25_pretrained.pdparams",
+    "PPLCNet_x0.35": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_35_pretrained.pdparams",
+    "PPLCNet_x0.5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_5_pretrained.pdparams",
+    "PPLCNet_x0.75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_75_pretrained.pdparams",
+    "PPLCNet_x1.0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_0_pretrained.pdparams",
+    "PPLCNet_x1.5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_5_pretrained.pdparams",
+    "PPLCNet_x2.0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_0_pretrained.pdparams",
+    "PPLCNet_x2.5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_5_pretrained.pdparams",
+}
+
+MODEL_STAGES_PATTERN = {
+    "PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]
+}
+
+__all__ = list(MODEL_URLS.keys())
+
+# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se.
+# k: kernel_size
+# in_c: input channel number in depthwise block
+# out_c: output channel number in depthwise block
+# s: stride in depthwise block
+# use_se: whether to use SE block
+
+NET_CONFIG = {
+    "blocks2":
+    # k, in_c, out_c, s, use_se
+    [[3, 16, 32, 1, False]],
+    "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
+    "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
+    "blocks5": [
+        [3, 128, 256, 2, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+    ],
+    "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]],
+}
+
+
+def make_divisible(v, divisor=8, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self, num_channels, filter_size, num_filters, stride, num_groups=1):
+        super().__init__()
+
+        self.conv = Conv2D(
+            in_channels=num_channels,
+            out_channels=num_filters,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=num_groups,
+            weight_attr=ParamAttr(initializer=KaimingNormal()),
+            bias_attr=False,
+        )
+
+        self.bn = BatchNorm(
+            num_filters,
+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)),
+        )
+        self.hardswish = nn.Hardswish()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.hardswish(x)
+        return x
+
+
+class DepthwiseSeparable(nn.Layer):
+    def __init__(self, num_channels, num_filters, stride, dw_size=3, use_se=False):
+        super().__init__()
+        self.use_se = use_se
+        self.dw_conv = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_channels,
+            filter_size=dw_size,
+            stride=stride,
+            num_groups=num_channels,
+        )
+        if use_se:
+            self.se = SEModule(num_channels)
+        self.pw_conv = ConvBNLayer(
+            num_channels=num_channels, filter_size=1, num_filters=num_filters, stride=1
+        )
+
+    def forward(self, x):
+        x = self.dw_conv(x)
+        if self.use_se:
+            x = self.se(x)
+        x = self.pw_conv(x)
+        return x
+
+
+class SEModule(nn.Layer):
+    def __init__(self, channel, reduction=4):
+        super().__init__()
+        self.avg_pool = AdaptiveAvgPool2D(1)
+        self.conv1 = Conv2D(
+            in_channels=channel,
+            out_channels=channel // reduction,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        self.relu = nn.ReLU()
+        self.conv2 = Conv2D(
+            in_channels=channel // reduction,
+            out_channels=channel,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        self.hardsigmoid = nn.Hardsigmoid()
+
+    def forward(self, x):
+        identity = x
+        x = self.avg_pool(x)
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.hardsigmoid(x)
+        x = paddle.multiply(x=identity, y=x)
+        return x
+
+
+class PPLCNet(nn.Layer):
+    def __init__(self, in_channels=3, scale=1.0, pretrained=False, use_ssld=False):
+        super().__init__()
+        self.out_channels = [
+            int(NET_CONFIG["blocks3"][-1][2] * scale),
+            int(NET_CONFIG["blocks4"][-1][2] * scale),
+            int(NET_CONFIG["blocks5"][-1][2] * scale),
+            int(NET_CONFIG["blocks6"][-1][2] * scale),
+        ]
+        self.scale = scale
+
+        self.conv1 = ConvBNLayer(
+            num_channels=in_channels,
+            filter_size=3,
+            num_filters=make_divisible(16 * scale),
+            stride=2,
+        )
+
+        self.blocks2 = nn.Sequential(
+            *[
+                DepthwiseSeparable(
+                    num_channels=make_divisible(in_c * scale),
+                    num_filters=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
+            ]
+        )
+
+        self.blocks3 = nn.Sequential(
+            *[
+                DepthwiseSeparable(
+                    num_channels=make_divisible(in_c * scale),
+                    num_filters=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
+            ]
+        )
+
+        self.blocks4 = nn.Sequential(
+            *[
+                DepthwiseSeparable(
+                    num_channels=make_divisible(in_c * scale),
+                    num_filters=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
+            ]
+        )
+
+        self.blocks5 = nn.Sequential(
+            *[
+                DepthwiseSeparable(
+                    num_channels=make_divisible(in_c * scale),
+                    num_filters=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
+            ]
+        )
+
+        self.blocks6 = nn.Sequential(
+            *[
+                DepthwiseSeparable(
+                    num_channels=make_divisible(in_c * scale),
+                    num_filters=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
+            ]
+        )
+
+        if pretrained:
+            self._load_pretrained(
+                MODEL_URLS["PPLCNet_x{}".format(scale)], use_ssld=use_ssld
+            )
+
+    def forward(self, x):
+        outs = []
+        x = self.conv1(x)
+        x = self.blocks2(x)
+        x = self.blocks3(x)
+        outs.append(x)
+        x = self.blocks4(x)
+        outs.append(x)
+        x = self.blocks5(x)
+        outs.append(x)
+        x = self.blocks6(x)
+        outs.append(x)
+        return outs
+
+    def _load_pretrained(self, pretrained_url, use_ssld=False):
+        if use_ssld:
+            pretrained_url = pretrained_url.replace("_pretrained", "_ssld_pretrained")
+        print(pretrained_url)
+        local_weight_path = get_path_from_url(
+            pretrained_url, os.path.expanduser("~/.paddleclas/weights")
+        )
+        param_state_dict = paddle.load(local_weight_path)
+        self.set_dict(param_state_dict)
+        return
diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet_v2.py b/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet_v2.py
new file mode 100644
index 0000000..5b5a568
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet_v2.py
@@ -0,0 +1,358 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division, print_function
+import os
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Dropout, Linear
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import KaimingNormal
+from paddle.utils.download import get_path_from_url
+
+MODEL_URLS = {
+    "PPLCNetV2_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNetV2_small_ssld_pretrained.pdparams",
+    "PPLCNetV2_base": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNetV2_base_ssld_pretrained.pdparams",
+    "PPLCNetV2_large": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNetV2_large_ssld_pretrained.pdparams",
+}
+
+__all__ = list(MODEL_URLS.keys())
+
+NET_CONFIG = {
+    # in_channels, kernel_size, split_pw, use_rep, use_se, use_shortcut
+    "stage1": [64, 3, False, False, False, False],
+    "stage2": [128, 3, False, False, False, False],
+    "stage3": [256, 5, True, True, True, False],
+    "stage4": [512, 5, False, True, False, True],
+}
+
+
+def make_divisible(v, divisor=8, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(
+        self, in_channels, out_channels, kernel_size, stride, groups=1, use_act=True
+    ):
+        super().__init__()
+        self.use_act = use_act
+        self.conv = Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(initializer=KaimingNormal()),
+            bias_attr=False,
+        )
+
+        self.bn = BatchNorm2D(
+            out_channels,
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)),
+        )
+        if self.use_act:
+            self.act = nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.use_act:
+            x = self.act(x)
+        return x
+
+
+class SEModule(nn.Layer):
+    def __init__(self, channel, reduction=4):
+        super().__init__()
+        self.avg_pool = AdaptiveAvgPool2D(1)
+        self.conv1 = Conv2D(
+            in_channels=channel,
+            out_channels=channel // reduction,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        self.relu = nn.ReLU()
+        self.conv2 = Conv2D(
+            in_channels=channel // reduction,
+            out_channels=channel,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        self.hardsigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        identity = x
+        x = self.avg_pool(x)
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.hardsigmoid(x)
+        x = paddle.multiply(x=identity, y=x)
+        return x
+
+
+class RepDepthwiseSeparable(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        stride,
+        dw_size=3,
+        split_pw=False,
+        use_rep=False,
+        use_se=False,
+        use_shortcut=False,
+    ):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.is_repped = False
+
+        self.dw_size = dw_size
+        self.split_pw = split_pw
+        self.use_rep = use_rep
+        self.use_se = use_se
+        self.use_shortcut = (
+            True
+            if use_shortcut and stride == 1 and in_channels == out_channels
+            else False
+        )
+
+        if self.use_rep:
+            self.dw_conv_list = nn.LayerList()
+            for kernel_size in range(self.dw_size, 0, -2):
+                if kernel_size == 1 and stride != 1:
+                    continue
+                dw_conv = ConvBNLayer(
+                    in_channels=in_channels,
+                    out_channels=in_channels,
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    groups=in_channels,
+                    use_act=False,
+                )
+                self.dw_conv_list.append(dw_conv)
+            self.dw_conv = nn.Conv2D(
+                in_channels=in_channels,
+                out_channels=in_channels,
+                kernel_size=dw_size,
+                stride=stride,
+                padding=(dw_size - 1) // 2,
+                groups=in_channels,
+            )
+        else:
+            self.dw_conv = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=in_channels,
+                kernel_size=dw_size,
+                stride=stride,
+                groups=in_channels,
+            )
+
+        self.act = nn.ReLU()
+
+        if use_se:
+            self.se = SEModule(in_channels)
+
+        if self.split_pw:
+            pw_ratio = 0.5
+            self.pw_conv_1 = ConvBNLayer(
+                in_channels=in_channels,
+                kernel_size=1,
+                out_channels=int(out_channels * pw_ratio),
+                stride=1,
+            )
+            self.pw_conv_2 = ConvBNLayer(
+                in_channels=int(out_channels * pw_ratio),
+                kernel_size=1,
+                out_channels=out_channels,
+                stride=1,
+            )
+        else:
+            self.pw_conv = ConvBNLayer(
+                in_channels=in_channels,
+                kernel_size=1,
+                out_channels=out_channels,
+                stride=1,
+            )
+
+    def forward(self, x):
+        if self.use_rep:
+            input_x = x
+            if self.is_repped:
+                x = self.act(self.dw_conv(x))
+            else:
+                y = self.dw_conv_list[0](x)
+                for dw_conv in self.dw_conv_list[1:]:
+                    y += dw_conv(x)
+                x = self.act(y)
+        else:
+            x = self.dw_conv(x)
+
+        if self.use_se:
+            x = self.se(x)
+        if self.split_pw:
+            x = self.pw_conv_1(x)
+            x = self.pw_conv_2(x)
+        else:
+            x = self.pw_conv(x)
+        if self.use_shortcut:
+            x = x + input_x
+        return x
+
+    def re_parameterize(self):
+        if self.use_rep:
+            self.is_repped = True
+            kernel, bias = self._get_equivalent_kernel_bias()
+            self.dw_conv.weight.set_value(kernel)
+            self.dw_conv.bias.set_value(bias)
+
+    def _get_equivalent_kernel_bias(self):
+        kernel_sum = 0
+        bias_sum = 0
+        for dw_conv in self.dw_conv_list:
+            kernel, bias = self._fuse_bn_tensor(dw_conv)
+            kernel = self._pad_tensor(kernel, to_size=self.dw_size)
+            kernel_sum += kernel
+            bias_sum += bias
+        return kernel_sum, bias_sum
+
+    def _fuse_bn_tensor(self, branch):
+        kernel = branch.conv.weight
+        running_mean = branch.bn._mean
+        running_var = branch.bn._variance
+        gamma = branch.bn.weight
+        beta = branch.bn.bias
+        eps = branch.bn._epsilon
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape((-1, 1, 1, 1))
+        return kernel * t, beta - running_mean * gamma / std
+
+    def _pad_tensor(self, tensor, to_size):
+        from_size = tensor.shape[-1]
+        if from_size == to_size:
+            return tensor
+        pad = (to_size - from_size) // 2
+        return F.pad(tensor, [pad, pad, pad, pad])
+
+
+class PPLCNetV2(nn.Layer):
+    def __init__(self, scale, depths, out_indx=[1, 2, 3, 4], **kwargs):
+        super().__init__(**kwargs)
+        self.scale = scale
+        self.out_channels = [
+            # int(NET_CONFIG["blocks3"][-1][2] * scale),
+            int(NET_CONFIG["stage1"][0] * scale * 2),
+            int(NET_CONFIG["stage2"][0] * scale * 2),
+            int(NET_CONFIG["stage3"][0] * scale * 2),
+            int(NET_CONFIG["stage4"][0] * scale * 2),
+        ]
+        self.stem = nn.Sequential(
+            *[
+                ConvBNLayer(
+                    in_channels=3,
+                    kernel_size=3,
+                    out_channels=make_divisible(32 * scale),
+                    stride=2,
+                ),
+                RepDepthwiseSeparable(
+                    in_channels=make_divisible(32 * scale),
+                    out_channels=make_divisible(64 * scale),
+                    stride=1,
+                    dw_size=3,
+                ),
+            ]
+        )
+        self.out_indx = out_indx
+        # stages
+        self.stages = nn.LayerList()
+        for depth_idx, k in enumerate(NET_CONFIG):
+            (
+                in_channels,
+                kernel_size,
+                split_pw,
+                use_rep,
+                use_se,
+                use_shortcut,
+            ) = NET_CONFIG[k]
+            self.stages.append(
+                nn.Sequential(
+                    *[
+                        RepDepthwiseSeparable(
+                            in_channels=make_divisible(
+                                (in_channels if i == 0 else in_channels * 2) * scale
+                            ),
+                            out_channels=make_divisible(in_channels * 2 * scale),
+                            stride=2 if i == 0 else 1,
+                            dw_size=kernel_size,
+                            split_pw=split_pw,
+                            use_rep=use_rep,
+                            use_se=use_se,
+                            use_shortcut=use_shortcut,
+                        )
+                        for i in range(depths[depth_idx])
+                    ]
+                )
+            )
+
+        # if pretrained:
+        self._load_pretrained(MODEL_URLS["PPLCNetV2_base"], use_ssld=True)
+
+    def forward(self, x):
+        x = self.stem(x)
+        i = 1
+        outs = []
+        for stage in self.stages:
+            x = stage(x)
+            if i in self.out_indx:
+                outs.append(x)
+            i += 1
+        return outs
+
+    def _load_pretrained(self, pretrained_url, use_ssld=False):
+        print(pretrained_url)
+        local_weight_path = get_path_from_url(
+            pretrained_url, os.path.expanduser("~/.paddleclas/weights")
+        )
+        param_state_dict = paddle.load(local_weight_path)
+        self.set_dict(param_state_dict)
+        print("load pretrain ssd success!")
+        return
+
+
+def PPLCNetV2_base(in_channels=3, **kwargs):
+    """
+    PPLCNetV2_base
+    Args:
+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
+                    If str, means the path of the pretrained model.
+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
+    Returns:
+        model: nn.Layer. Specific `PPLCNetV2_base` model depends on args.
+    """
+    model = PPLCNetV2(scale=1.0, depths=[2, 2, 6, 2], **kwargs)
+    return model
diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/rec_lcnetv3.py b/docling_ibm_models/slanet_1m/modeling/backbones/rec_lcnetv3.py
new file mode 100644
index 0000000..b54670c
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/backbones/rec_lcnetv3.py
@@ -0,0 +1,554 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import Constant, KaimingNormal
+from paddle.nn import (
+    AdaptiveAvgPool2D,
+    BatchNorm2D,
+    Conv2D,
+    Dropout,
+    Hardsigmoid,
+    Hardswish,
+    Identity,
+    Linear,
+    ReLU,
+)
+from paddle.regularizer import L2Decay
+
+NET_CONFIG_det = {
+    "blocks2":
+    # k, in_c, out_c, s, use_se
+    [[3, 16, 32, 1, False]],
+    "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
+    "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
+    "blocks5": [
+        [3, 128, 256, 2, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+    ],
+    "blocks6": [
+        [5, 256, 512, 2, True],
+        [5, 512, 512, 1, True],
+        [5, 512, 512, 1, False],
+        [5, 512, 512, 1, False],
+    ],
+}
+
+NET_CONFIG_rec = {
+    "blocks2":
+    # k, in_c, out_c, s, use_se
+    [[3, 16, 32, 1, False]],
+    "blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]],
+    "blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]],
+    "blocks5": [
+        [3, 128, 256, (1, 2), False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+    ],
+    "blocks6": [
+        [5, 256, 512, (2, 1), True],
+        [5, 512, 512, 1, True],
+        [5, 512, 512, (2, 1), False],
+        [5, 512, 512, 1, False],
+    ],
+}
+
+
+def make_divisible(v, divisor=16, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class LearnableAffineBlock(nn.Layer):
+    def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.1):
+        super().__init__()
+        self.scale = self.create_parameter(
+            shape=[
+                1,
+            ],
+            default_initializer=Constant(value=scale_value),
+            attr=ParamAttr(learning_rate=lr_mult * lab_lr),
+        )
+        self.add_parameter("scale", self.scale)
+        self.bias = self.create_parameter(
+            shape=[
+                1,
+            ],
+            default_initializer=Constant(value=bias_value),
+            attr=ParamAttr(learning_rate=lr_mult * lab_lr),
+        )
+        self.add_parameter("bias", self.bias)
+
+    def forward(self, x):
+        return self.scale * x + self.bias
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(
+        self, in_channels, out_channels, kernel_size, stride, groups=1, lr_mult=1.0
+    ):
+        super().__init__()
+        self.conv = Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(initializer=KaimingNormal(), learning_rate=lr_mult),
+            bias_attr=False,
+        )
+
+        self.bn = BatchNorm2D(
+            out_channels,
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0), learning_rate=lr_mult),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0), learning_rate=lr_mult),
+        )
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class Act(nn.Layer):
+    def __init__(self, act="hswish", lr_mult=1.0, lab_lr=0.1):
+        super().__init__()
+        if act == "hswish":
+            self.act = Hardswish()
+        else:
+            assert act == "relu"
+            self.act = ReLU()
+        self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
+
+    def forward(self, x):
+        return self.lab(self.act(x))
+
+
+class LearnableRepLayer(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        groups=1,
+        num_conv_branches=1,
+        lr_mult=1.0,
+        lab_lr=0.1,
+    ):
+        super().__init__()
+        self.is_repped = False
+        self.groups = groups
+        self.stride = stride
+        self.kernel_size = kernel_size
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_conv_branches = num_conv_branches
+        self.padding = (kernel_size - 1) // 2
+
+        self.identity = (
+            BatchNorm2D(
+                num_features=in_channels,
+                weight_attr=ParamAttr(learning_rate=lr_mult),
+                bias_attr=ParamAttr(learning_rate=lr_mult),
+            )
+            if out_channels == in_channels and stride == 1
+            else None
+        )
+
+        self.conv_kxk = nn.LayerList(
+            [
+                ConvBNLayer(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    stride,
+                    groups=groups,
+                    lr_mult=lr_mult,
+                )
+                for _ in range(self.num_conv_branches)
+            ]
+        )
+
+        self.conv_1x1 = (
+            ConvBNLayer(
+                in_channels, out_channels, 1, stride, groups=groups, lr_mult=lr_mult
+            )
+            if kernel_size > 1
+            else None
+        )
+
+        self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
+        self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr)
+
+    def forward(self, x):
+        # for export
+        if self.is_repped:
+            out = self.lab(self.reparam_conv(x))
+            if self.stride != 2:
+                out = self.act(out)
+            return out
+
+        out = 0
+        if self.identity is not None:
+            out += self.identity(x)
+
+        if self.conv_1x1 is not None:
+            out += self.conv_1x1(x)
+
+        for conv in self.conv_kxk:
+            out += conv(x)
+
+        out = self.lab(out)
+        if self.stride != 2:
+            out = self.act(out)
+        return out
+
+    def rep(self):
+        if self.is_repped:
+            return
+        kernel, bias = self._get_kernel_bias()
+        self.reparam_conv = Conv2D(
+            in_channels=self.in_channels,
+            out_channels=self.out_channels,
+            kernel_size=self.kernel_size,
+            stride=self.stride,
+            padding=self.padding,
+            groups=self.groups,
+        )
+        self.reparam_conv.weight.set_value(kernel)
+        self.reparam_conv.bias.set_value(bias)
+        self.is_repped = True
+
+    def _pad_kernel_1x1_to_kxk(self, kernel1x1, pad):
+        if not isinstance(kernel1x1, paddle.Tensor):
+            return 0
+        else:
+            return nn.functional.pad(kernel1x1, [pad, pad, pad, pad])
+
+    def _get_kernel_bias(self):
+        kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1)
+        kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(
+            kernel_conv_1x1, self.kernel_size // 2
+        )
+
+        kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
+
+        kernel_conv_kxk = 0
+        bias_conv_kxk = 0
+        for conv in self.conv_kxk:
+            kernel, bias = self._fuse_bn_tensor(conv)
+            kernel_conv_kxk += kernel
+            bias_conv_kxk += bias
+
+        kernel_reparam = kernel_conv_kxk + kernel_conv_1x1 + kernel_identity
+        bias_reparam = bias_conv_kxk + bias_conv_1x1 + bias_identity
+        return kernel_reparam, bias_reparam
+
+    def _fuse_bn_tensor(self, branch):
+        if not branch:
+            return 0, 0
+        elif isinstance(branch, ConvBNLayer):
+            kernel = branch.conv.weight
+            running_mean = branch.bn._mean
+            running_var = branch.bn._variance
+            gamma = branch.bn.weight
+            beta = branch.bn.bias
+            eps = branch.bn._epsilon
+        else:
+            assert isinstance(branch, BatchNorm2D)
+            if not hasattr(self, "id_tensor"):
+                input_dim = self.in_channels // self.groups
+                kernel_value = paddle.zeros(
+                    (self.in_channels, input_dim, self.kernel_size, self.kernel_size),
+                    dtype=branch.weight.dtype,
+                )
+                for i in range(self.in_channels):
+                    kernel_value[
+                        i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2
+                    ] = 1
+                self.id_tensor = kernel_value
+            kernel = self.id_tensor
+            running_mean = branch._mean
+            running_var = branch._variance
+            gamma = branch.weight
+            beta = branch.bias
+            eps = branch._epsilon
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape((-1, 1, 1, 1))
+        return kernel * t, beta - running_mean * gamma / std
+
+
+class SELayer(nn.Layer):
+    def __init__(self, channel, reduction=4, lr_mult=1.0):
+        super().__init__()
+        self.avg_pool = AdaptiveAvgPool2D(1)
+        self.conv1 = Conv2D(
+            in_channels=channel,
+            out_channels=channel // reduction,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(learning_rate=lr_mult),
+            bias_attr=ParamAttr(learning_rate=lr_mult),
+        )
+        self.relu = ReLU()
+        self.conv2 = Conv2D(
+            in_channels=channel // reduction,
+            out_channels=channel,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(learning_rate=lr_mult),
+            bias_attr=ParamAttr(learning_rate=lr_mult),
+        )
+        self.hardsigmoid = Hardsigmoid()
+
+    def forward(self, x):
+        identity = x
+        x = self.avg_pool(x)
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.hardsigmoid(x)
+        x = paddle.multiply(x=identity, y=x)
+        return x
+
+
+class LCNetV3Block(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        stride,
+        dw_size,
+        use_se=False,
+        conv_kxk_num=4,
+        lr_mult=1.0,
+        lab_lr=0.1,
+    ):
+        super().__init__()
+        self.use_se = use_se
+        self.dw_conv = LearnableRepLayer(
+            in_channels=in_channels,
+            out_channels=in_channels,
+            kernel_size=dw_size,
+            stride=stride,
+            groups=in_channels,
+            num_conv_branches=conv_kxk_num,
+            lr_mult=lr_mult,
+            lab_lr=lab_lr,
+        )
+        if use_se:
+            self.se = SELayer(in_channels, lr_mult=lr_mult)
+        self.pw_conv = LearnableRepLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            num_conv_branches=conv_kxk_num,
+            lr_mult=lr_mult,
+            lab_lr=lab_lr,
+        )
+
+    def forward(self, x):
+        x = self.dw_conv(x)
+        if self.use_se:
+            x = self.se(x)
+        x = self.pw_conv(x)
+        return x
+
+
+class PPLCNetV3(nn.Layer):
+    def __init__(
+        self,
+        scale=1.0,
+        conv_kxk_num=4,
+        lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+        lab_lr=0.1,
+        det=False,
+        **kwargs,
+    ):
+        super().__init__()
+        self.scale = scale
+        self.lr_mult_list = lr_mult_list
+        self.det = det
+
+        self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec
+
+        assert isinstance(
+            self.lr_mult_list, (list, tuple)
+        ), "lr_mult_list should be in (list, tuple) but got {}".format(
+            type(self.lr_mult_list)
+        )
+        assert (
+            len(self.lr_mult_list) == 6
+        ), "lr_mult_list length should be 6 but got {}".format(len(self.lr_mult_list))
+
+        self.conv1 = ConvBNLayer(
+            in_channels=3,
+            out_channels=make_divisible(16 * scale),
+            kernel_size=3,
+            stride=2,
+            lr_mult=self.lr_mult_list[0],
+        )
+
+        self.blocks2 = nn.Sequential(
+            *[
+                LCNetV3Block(
+                    in_channels=make_divisible(in_c * scale),
+                    out_channels=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                    conv_kxk_num=conv_kxk_num,
+                    lr_mult=self.lr_mult_list[1],
+                    lab_lr=lab_lr,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks2"])
+            ]
+        )
+
+        self.blocks3 = nn.Sequential(
+            *[
+                LCNetV3Block(
+                    in_channels=make_divisible(in_c * scale),
+                    out_channels=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                    conv_kxk_num=conv_kxk_num,
+                    lr_mult=self.lr_mult_list[2],
+                    lab_lr=lab_lr,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks3"])
+            ]
+        )
+
+        self.blocks4 = nn.Sequential(
+            *[
+                LCNetV3Block(
+                    in_channels=make_divisible(in_c * scale),
+                    out_channels=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                    conv_kxk_num=conv_kxk_num,
+                    lr_mult=self.lr_mult_list[3],
+                    lab_lr=lab_lr,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks4"])
+            ]
+        )
+
+        self.blocks5 = nn.Sequential(
+            *[
+                LCNetV3Block(
+                    in_channels=make_divisible(in_c * scale),
+                    out_channels=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                    conv_kxk_num=conv_kxk_num,
+                    lr_mult=self.lr_mult_list[4],
+                    lab_lr=lab_lr,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks5"])
+            ]
+        )
+
+        self.blocks6 = nn.Sequential(
+            *[
+                LCNetV3Block(
+                    in_channels=make_divisible(in_c * scale),
+                    out_channels=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                    conv_kxk_num=conv_kxk_num,
+                    lr_mult=self.lr_mult_list[5],
+                    lab_lr=lab_lr,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks6"])
+            ]
+        )
+        self.out_channels = make_divisible(512 * scale)
+
+        if self.det:
+            mv_c = [16, 24, 56, 480]
+            self.out_channels = [
+                make_divisible(self.net_config["blocks3"][-1][2] * scale),
+                make_divisible(self.net_config["blocks4"][-1][2] * scale),
+                make_divisible(self.net_config["blocks5"][-1][2] * scale),
+                make_divisible(self.net_config["blocks6"][-1][2] * scale),
+            ]
+
+            self.layer_list = nn.LayerList(
+                [
+                    nn.Conv2D(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
+                    nn.Conv2D(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
+                    nn.Conv2D(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
+                    nn.Conv2D(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0),
+                ]
+            )
+            self.out_channels = [
+                int(mv_c[0] * scale),
+                int(mv_c[1] * scale),
+                int(mv_c[2] * scale),
+                int(mv_c[3] * scale),
+            ]
+
+    def forward(self, x):
+        out_list = []
+        x = self.conv1(x)
+
+        x = self.blocks2(x)
+        x = self.blocks3(x)
+        out_list.append(x)
+        x = self.blocks4(x)
+        out_list.append(x)
+        x = self.blocks5(x)
+        out_list.append(x)
+        x = self.blocks6(x)
+        out_list.append(x)
+
+        if self.det:
+            out_list[0] = self.layer_list[0](out_list[0])
+            out_list[1] = self.layer_list[1](out_list[1])
+            out_list[2] = self.layer_list[2](out_list[2])
+            out_list[3] = self.layer_list[3](out_list[3])
+            return out_list
+
+        if self.training:
+            x = F.adaptive_avg_pool2d(x, [1, 40])
+        else:
+            x = F.avg_pool2d(x, [3, 2])
+        return x
diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/rec_resnet_fpn.py b/docling_ibm_models/slanet_1m/modeling/backbones/rec_resnet_fpn.py
new file mode 100644
index 0000000..d259f1d
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/backbones/rec_resnet_fpn.py
@@ -0,0 +1,317 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import nn, ParamAttr
+from paddle.nn import functional as F
+import paddle
+import numpy as np
+
+__all__ = ["ResNetFPN"]
+
+
+class ResNetFPN(nn.Layer):
+    def __init__(self, in_channels=1, layers=50, **kwargs):
+        super(ResNetFPN, self).__init__()
+        supported_layers = {
+            18: {"depth": [2, 2, 2, 2], "block_class": BasicBlock},
+            34: {"depth": [3, 4, 6, 3], "block_class": BasicBlock},
+            50: {"depth": [3, 4, 6, 3], "block_class": BottleneckBlock},
+            101: {"depth": [3, 4, 23, 3], "block_class": BottleneckBlock},
+            152: {"depth": [3, 8, 36, 3], "block_class": BottleneckBlock},
+        }
+        stride_list = [(2, 2), (2, 2), (1, 1), (1, 1)]
+        num_filters = [64, 128, 256, 512]
+        self.depth = supported_layers[layers]["depth"]
+        self.F = []
+        self.conv = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=64,
+            kernel_size=7,
+            stride=2,
+            act="relu",
+            name="conv1",
+        )
+        self.block_list = []
+        in_ch = 64
+        if layers >= 50:
+            for block in range(len(self.depth)):
+                for i in range(self.depth[block]):
+                    if layers in [101, 152] and block == 2:
+                        if i == 0:
+                            conv_name = "res" + str(block + 2) + "a"
+                        else:
+                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                    else:
+                        conv_name = "res" + str(block + 2) + chr(97 + i)
+                    block_list = self.add_sublayer(
+                        "bottleneckBlock_{}_{}".format(block, i),
+                        BottleneckBlock(
+                            in_channels=in_ch,
+                            out_channels=num_filters[block],
+                            stride=stride_list[block] if i == 0 else 1,
+                            name=conv_name,
+                        ),
+                    )
+                    in_ch = num_filters[block] * 4
+                    self.block_list.append(block_list)
+                self.F.append(block_list)
+        else:
+            for block in range(len(self.depth)):
+                for i in range(self.depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                    if i == 0 and block != 0:
+                        stride = (2, 1)
+                    else:
+                        stride = (1, 1)
+                    basic_block = self.add_sublayer(
+                        conv_name,
+                        BasicBlock(
+                            in_channels=in_ch,
+                            out_channels=num_filters[block],
+                            stride=stride_list[block] if i == 0 else 1,
+                            is_first=block == i == 0,
+                            name=conv_name,
+                        ),
+                    )
+                    in_ch = basic_block.out_channels
+                    self.block_list.append(basic_block)
+        out_ch_list = [in_ch // 4, in_ch // 2, in_ch]
+        self.base_block = []
+        self.conv_trans = []
+        self.bn_block = []
+        for i in [-2, -3]:
+            in_channels = out_ch_list[i + 1] + out_ch_list[i]
+
+            self.base_block.append(
+                self.add_sublayer(
+                    "F_{}_base_block_0".format(i),
+                    nn.Conv2D(
+                        in_channels=in_channels,
+                        out_channels=out_ch_list[i],
+                        kernel_size=1,
+                        weight_attr=ParamAttr(trainable=True),
+                        bias_attr=ParamAttr(trainable=True),
+                    ),
+                )
+            )
+            self.base_block.append(
+                self.add_sublayer(
+                    "F_{}_base_block_1".format(i),
+                    nn.Conv2D(
+                        in_channels=out_ch_list[i],
+                        out_channels=out_ch_list[i],
+                        kernel_size=3,
+                        padding=1,
+                        weight_attr=ParamAttr(trainable=True),
+                        bias_attr=ParamAttr(trainable=True),
+                    ),
+                )
+            )
+            self.base_block.append(
+                self.add_sublayer(
+                    "F_{}_base_block_2".format(i),
+                    nn.BatchNorm(
+                        num_channels=out_ch_list[i],
+                        act="relu",
+                        param_attr=ParamAttr(trainable=True),
+                        bias_attr=ParamAttr(trainable=True),
+                    ),
+                )
+            )
+        self.base_block.append(
+            self.add_sublayer(
+                "F_{}_base_block_3".format(i),
+                nn.Conv2D(
+                    in_channels=out_ch_list[i],
+                    out_channels=512,
+                    kernel_size=1,
+                    bias_attr=ParamAttr(trainable=True),
+                    weight_attr=ParamAttr(trainable=True),
+                ),
+            )
+        )
+        self.out_channels = 512
+
+    def __call__(self, x):
+        x = self.conv(x)
+        fpn_list = []
+        F = []
+        for i in range(len(self.depth)):
+            fpn_list.append(np.sum(self.depth[: i + 1]))
+
+        for i, block in enumerate(self.block_list):
+            x = block(x)
+            for number in fpn_list:
+                if i + 1 == number:
+                    F.append(x)
+        base = F[-1]
+
+        j = 0
+        for i, block in enumerate(self.base_block):
+            if i % 3 == 0 and i < 6:
+                j = j + 1
+                b, c, w, h = F[-j - 1].shape
+                if [w, h] == list(base.shape[2:]):
+                    base = base
+                else:
+                    base = self.conv_trans[j - 1](base)
+                    base = self.bn_block[j - 1](base)
+                base = paddle.concat([base, F[-j - 1]], axis=1)
+            base = block(base)
+        return base
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        groups=1,
+        act=None,
+        name=None,
+    ):
+        super(ConvBNLayer, self).__init__()
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=2 if stride == (1, 1) else kernel_size,
+            dilation=2 if stride == (1, 1) else 1,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + ".conv2d.output.1.w_0"),
+            bias_attr=False,
+        )
+
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name=name + ".output.1.w_0"),
+            bias_attr=ParamAttr(name=name + ".output.1.b_0"),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance",
+        )
+
+    def __call__(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class ShortCut(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, name, is_first=False):
+        super(ShortCut, self).__init__()
+        self.use_conv = True
+
+        if in_channels != out_channels or stride != 1 or is_first == True:
+            if stride == (1, 1):
+                self.conv = ConvBNLayer(in_channels, out_channels, 1, 1, name=name)
+            else:  # stride==(2,2)
+                self.conv = ConvBNLayer(in_channels, out_channels, 1, stride, name=name)
+        else:
+            self.use_conv = False
+
+    def forward(self, x):
+        if self.use_conv:
+            x = self.conv(x)
+        return x
+
+
+class BottleneckBlock(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, name):
+        super(BottleneckBlock, self).__init__()
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            act="relu",
+            name=name + "_branch2a",
+        )
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=stride,
+            act="relu",
+            name=name + "_branch2b",
+        )
+
+        self.conv2 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels * 4,
+            kernel_size=1,
+            act=None,
+            name=name + "_branch2c",
+        )
+
+        self.short = ShortCut(
+            in_channels=in_channels,
+            out_channels=out_channels * 4,
+            stride=stride,
+            is_first=False,
+            name=name + "_branch1",
+        )
+        self.out_channels = out_channels * 4
+
+    def forward(self, x):
+        y = self.conv0(x)
+        y = self.conv1(y)
+        y = self.conv2(y)
+        y = y + self.short(x)
+        y = F.relu(y)
+        return y
+
+
+class BasicBlock(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, name, is_first):
+        super(BasicBlock, self).__init__()
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            act="relu",
+            stride=stride,
+            name=name + "_branch2a",
+        )
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            act=None,
+            name=name + "_branch2b",
+        )
+        self.short = ShortCut(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            stride=stride,
+            is_first=is_first,
+            name=name + "_branch1",
+        )
+        self.out_channels = out_channels
+
+    def forward(self, x):
+        y = self.conv0(x)
+        y = self.conv1(y)
+        y = y + self.short(x)
+        return F.relu(y)
diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/rec_svtrnet.py b/docling_ibm_models/slanet_1m/modeling/backbones/rec_svtrnet.py
new file mode 100644
index 0000000..427c87b
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/backbones/rec_svtrnet.py
@@ -0,0 +1,642 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle import ParamAttr
+from paddle.nn.initializer import KaimingNormal
+import numpy as np
+import paddle
+import paddle.nn as nn
+from paddle.nn.initializer import TruncatedNormal, Constant, Normal
+
+trunc_normal_ = TruncatedNormal(std=0.02)
+normal_ = Normal
+zeros_ = Constant(value=0.0)
+ones_ = Constant(value=1.0)
+
+
+def drop_path(x, drop_prob=0.0, training=False):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
+    """
+    if drop_prob == 0.0 or not training:
+        return x
+    keep_prob = paddle.to_tensor(1 - drop_prob, dtype=x.dtype)
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)
+    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
+    random_tensor = paddle.floor(random_tensor)  # binarize
+    output = x.divide(keep_prob) * random_tensor
+    return output
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size=3,
+        stride=1,
+        padding=0,
+        bias_attr=False,
+        groups=1,
+        act=nn.GELU,
+    ):
+        super().__init__()
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=paddle.ParamAttr(initializer=nn.initializer.KaimingUniform()),
+            bias_attr=bias_attr,
+        )
+        self.norm = nn.BatchNorm2D(out_channels)
+        self.act = act()
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        out = self.norm(out)
+        out = self.act(out)
+        return out
+
+
+class DropPath(nn.Layer):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks)."""
+
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+
+
+class Identity(nn.Layer):
+    def __init__(self):
+        super(Identity, self).__init__()
+
+    def forward(self, input):
+        return input
+
+
+class Mlp(nn.Layer):
+    def __init__(
+        self,
+        in_features,
+        hidden_features=None,
+        out_features=None,
+        act_layer=nn.GELU,
+        drop=0.0,
+    ):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class ConvMixer(nn.Layer):
+    def __init__(
+        self,
+        dim,
+        num_heads=8,
+        HW=[8, 25],
+        local_k=[3, 3],
+    ):
+        super().__init__()
+        self.HW = HW
+        self.dim = dim
+        self.local_mixer = nn.Conv2D(
+            dim,
+            dim,
+            local_k,
+            1,
+            [local_k[0] // 2, local_k[1] // 2],
+            groups=num_heads,
+            weight_attr=ParamAttr(initializer=KaimingNormal()),
+        )
+
+    def forward(self, x):
+        h = self.HW[0]
+        w = self.HW[1]
+        x = x.transpose([0, 2, 1]).reshape([0, self.dim, h, w])
+        x = self.local_mixer(x)
+        x = x.flatten(2).transpose([0, 2, 1])
+        return x
+
+
+class Attention(nn.Layer):
+    def __init__(
+        self,
+        dim,
+        num_heads=8,
+        mixer="Global",
+        HW=None,
+        local_k=[7, 11],
+        qkv_bias=False,
+        qk_scale=None,
+        attn_drop=0.0,
+        proj_drop=0.0,
+    ):
+        super().__init__()
+        self.num_heads = num_heads
+        self.dim = dim
+        self.head_dim = dim // num_heads
+        self.scale = qk_scale or self.head_dim**-0.5
+
+        self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.HW = HW
+        if HW is not None:
+            H = HW[0]
+            W = HW[1]
+            self.N = H * W
+            self.C = dim
+        if mixer == "Local" and HW is not None:
+            hk = local_k[0]
+            wk = local_k[1]
+            mask = paddle.ones([H * W, H + hk - 1, W + wk - 1], dtype="float32")
+            for h in range(0, H):
+                for w in range(0, W):
+                    mask[h * W + w, h : h + hk, w : w + wk] = 0.0
+            mask_paddle = mask[:, hk // 2 : H + hk // 2, wk // 2 : W + wk // 2].flatten(
+                1
+            )
+            mask_inf = paddle.full([H * W, H * W], "-inf", dtype="float32")
+            mask = paddle.where(mask_paddle < 1, mask_paddle, mask_inf)
+            self.mask = mask.unsqueeze([0, 1])
+        self.mixer = mixer
+
+    def forward(self, x):
+        qkv = (
+            self.qkv(x)
+            .reshape((0, -1, 3, self.num_heads, self.head_dim))
+            .transpose((2, 0, 3, 1, 4))
+        )
+        q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
+
+        attn = q.matmul(k.transpose((0, 1, 3, 2)))
+        if self.mixer == "Local":
+            attn += self.mask
+        attn = nn.functional.softmax(attn, axis=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((0, -1, self.dim))
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class Block(nn.Layer):
+    def __init__(
+        self,
+        dim,
+        num_heads,
+        mixer="Global",
+        local_mixer=[7, 11],
+        HW=None,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        act_layer=nn.GELU,
+        norm_layer="nn.LayerNorm",
+        epsilon=1e-6,
+        prenorm=True,
+    ):
+        super().__init__()
+        if isinstance(norm_layer, str):
+            self.norm1 = eval(norm_layer)(dim, epsilon=epsilon)
+        else:
+            self.norm1 = norm_layer(dim)
+        if mixer == "Global" or mixer == "Local":
+            self.mixer = Attention(
+                dim,
+                num_heads=num_heads,
+                mixer=mixer,
+                HW=HW,
+                local_k=local_mixer,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                attn_drop=attn_drop,
+                proj_drop=drop,
+            )
+        elif mixer == "Conv":
+            self.mixer = ConvMixer(dim, num_heads=num_heads, HW=HW, local_k=local_mixer)
+        else:
+            raise TypeError("The mixer must be one of [Global, Local, Conv]")
+
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
+        if isinstance(norm_layer, str):
+            self.norm2 = eval(norm_layer)(dim, epsilon=epsilon)
+        else:
+            self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp_ratio = mlp_ratio
+        self.mlp = Mlp(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_layer=act_layer,
+            drop=drop,
+        )
+        self.prenorm = prenorm
+
+    def forward(self, x):
+        if self.prenorm:
+            x = self.norm1(x + self.drop_path(self.mixer(x)))
+            x = self.norm2(x + self.drop_path(self.mlp(x)))
+        else:
+            x = x + self.drop_path(self.mixer(self.norm1(x)))
+            x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+
+
+class PatchEmbed(nn.Layer):
+    """Image to Patch Embedding"""
+
+    def __init__(
+        self,
+        img_size=[32, 100],
+        in_channels=3,
+        embed_dim=768,
+        sub_num=2,
+        patch_size=[4, 4],
+        mode="pope",
+    ):
+        super().__init__()
+        num_patches = (img_size[1] // (2**sub_num)) * (img_size[0] // (2**sub_num))
+        self.img_size = img_size
+        self.num_patches = num_patches
+        self.embed_dim = embed_dim
+        self.norm = None
+        if mode == "pope":
+            if sub_num == 2:
+                self.proj = nn.Sequential(
+                    ConvBNLayer(
+                        in_channels=in_channels,
+                        out_channels=embed_dim // 2,
+                        kernel_size=3,
+                        stride=2,
+                        padding=1,
+                        act=nn.GELU,
+                        bias_attr=None,
+                    ),
+                    ConvBNLayer(
+                        in_channels=embed_dim // 2,
+                        out_channels=embed_dim,
+                        kernel_size=3,
+                        stride=2,
+                        padding=1,
+                        act=nn.GELU,
+                        bias_attr=None,
+                    ),
+                )
+            if sub_num == 3:
+                self.proj = nn.Sequential(
+                    ConvBNLayer(
+                        in_channels=in_channels,
+                        out_channels=embed_dim // 4,
+                        kernel_size=3,
+                        stride=2,
+                        padding=1,
+                        act=nn.GELU,
+                        bias_attr=None,
+                    ),
+                    ConvBNLayer(
+                        in_channels=embed_dim // 4,
+                        out_channels=embed_dim // 2,
+                        kernel_size=3,
+                        stride=2,
+                        padding=1,
+                        act=nn.GELU,
+                        bias_attr=None,
+                    ),
+                    ConvBNLayer(
+                        in_channels=embed_dim // 2,
+                        out_channels=embed_dim,
+                        kernel_size=3,
+                        stride=2,
+                        padding=1,
+                        act=nn.GELU,
+                        bias_attr=None,
+                    ),
+                )
+        elif mode == "linear":
+            self.proj = nn.Conv2D(
+                1, embed_dim, kernel_size=patch_size, stride=patch_size
+            )
+            self.num_patches = (
+                img_size[0] // patch_size[0] * img_size[1] // patch_size[1]
+            )
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+        assert (
+            H == self.img_size[0] and W == self.img_size[1]
+        ), f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose((0, 2, 1))
+        return x
+
+
+class SubSample(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        types="Pool",
+        stride=[2, 1],
+        sub_norm="nn.LayerNorm",
+        act=None,
+    ):
+        super().__init__()
+        self.types = types
+        if types == "Pool":
+            self.avgpool = nn.AvgPool2D(
+                kernel_size=[3, 5], stride=stride, padding=[1, 2]
+            )
+            self.maxpool = nn.MaxPool2D(
+                kernel_size=[3, 5], stride=stride, padding=[1, 2]
+            )
+            self.proj = nn.Linear(in_channels, out_channels)
+        else:
+            self.conv = nn.Conv2D(
+                in_channels,
+                out_channels,
+                kernel_size=3,
+                stride=stride,
+                padding=1,
+                weight_attr=ParamAttr(initializer=KaimingNormal()),
+            )
+        self.norm = eval(sub_norm)(out_channels)
+        if act is not None:
+            self.act = act()
+        else:
+            self.act = None
+
+    def forward(self, x):
+        if self.types == "Pool":
+            x1 = self.avgpool(x)
+            x2 = self.maxpool(x)
+            x = (x1 + x2) * 0.5
+            out = self.proj(x.flatten(2).transpose((0, 2, 1)))
+        else:
+            x = self.conv(x)
+            out = x.flatten(2).transpose((0, 2, 1))
+        out = self.norm(out)
+        if self.act is not None:
+            out = self.act(out)
+
+        return out
+
+
+class SVTRNet(nn.Layer):
+    def __init__(
+        self,
+        img_size=[32, 100],
+        in_channels=3,
+        embed_dim=[64, 128, 256],
+        depth=[3, 6, 3],
+        num_heads=[2, 4, 8],
+        mixer=["Local"] * 6 + ["Global"] * 6,  # Local atten, Global atten, Conv
+        local_mixer=[[7, 11], [7, 11], [7, 11]],
+        patch_merging="Conv",  # Conv, Pool, None
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.0,
+        last_drop=0.1,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.1,
+        norm_layer="nn.LayerNorm",
+        sub_norm="nn.LayerNorm",
+        epsilon=1e-6,
+        out_channels=192,
+        out_char_num=25,
+        block_unit="Block",
+        act="nn.GELU",
+        last_stage=True,
+        sub_num=2,
+        prenorm=True,
+        use_lenhead=False,
+        **kwargs,
+    ):
+        super().__init__()
+        self.img_size = img_size
+        self.embed_dim = embed_dim
+        self.out_channels = out_channels
+        self.prenorm = prenorm
+        patch_merging = (
+            None
+            if patch_merging != "Conv" and patch_merging != "Pool"
+            else patch_merging
+        )
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            in_channels=in_channels,
+            embed_dim=embed_dim[0],
+            sub_num=sub_num,
+        )
+        num_patches = self.patch_embed.num_patches
+        self.HW = [img_size[0] // (2**sub_num), img_size[1] // (2**sub_num)]
+        self.pos_embed = self.create_parameter(
+            shape=[1, num_patches, embed_dim[0]], default_initializer=zeros_
+        )
+        self.add_parameter("pos_embed", self.pos_embed)
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        Block_unit = eval(block_unit)
+
+        dpr = np.linspace(0, drop_path_rate, sum(depth))
+        self.blocks1 = nn.LayerList(
+            [
+                Block_unit(
+                    dim=embed_dim[0],
+                    num_heads=num_heads[0],
+                    mixer=mixer[0 : depth[0]][i],
+                    HW=self.HW,
+                    local_mixer=local_mixer[0],
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop_rate,
+                    act_layer=eval(act),
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[0 : depth[0]][i],
+                    norm_layer=norm_layer,
+                    epsilon=epsilon,
+                    prenorm=prenorm,
+                )
+                for i in range(depth[0])
+            ]
+        )
+        if patch_merging is not None:
+            self.sub_sample1 = SubSample(
+                embed_dim[0],
+                embed_dim[1],
+                sub_norm=sub_norm,
+                stride=[2, 1],
+                types=patch_merging,
+            )
+            HW = [self.HW[0] // 2, self.HW[1]]
+        else:
+            HW = self.HW
+        self.patch_merging = patch_merging
+        self.blocks2 = nn.LayerList(
+            [
+                Block_unit(
+                    dim=embed_dim[1],
+                    num_heads=num_heads[1],
+                    mixer=mixer[depth[0] : depth[0] + depth[1]][i],
+                    HW=HW,
+                    local_mixer=local_mixer[1],
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop_rate,
+                    act_layer=eval(act),
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[depth[0] : depth[0] + depth[1]][i],
+                    norm_layer=norm_layer,
+                    epsilon=epsilon,
+                    prenorm=prenorm,
+                )
+                for i in range(depth[1])
+            ]
+        )
+        if patch_merging is not None:
+            self.sub_sample2 = SubSample(
+                embed_dim[1],
+                embed_dim[2],
+                sub_norm=sub_norm,
+                stride=[2, 1],
+                types=patch_merging,
+            )
+            HW = [self.HW[0] // 4, self.HW[1]]
+        else:
+            HW = self.HW
+        self.blocks3 = nn.LayerList(
+            [
+                Block_unit(
+                    dim=embed_dim[2],
+                    num_heads=num_heads[2],
+                    mixer=mixer[depth[0] + depth[1] :][i],
+                    HW=HW,
+                    local_mixer=local_mixer[2],
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop_rate,
+                    act_layer=eval(act),
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[depth[0] + depth[1] :][i],
+                    norm_layer=norm_layer,
+                    epsilon=epsilon,
+                    prenorm=prenorm,
+                )
+                for i in range(depth[2])
+            ]
+        )
+        self.last_stage = last_stage
+        if last_stage:
+            self.avg_pool = nn.AdaptiveAvgPool2D([1, out_char_num])
+            self.last_conv = nn.Conv2D(
+                in_channels=embed_dim[2],
+                out_channels=self.out_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias_attr=False,
+            )
+            self.hardswish = nn.Hardswish()
+            self.dropout = nn.Dropout(p=last_drop, mode="downscale_in_infer")
+        if not prenorm:
+            self.norm = eval(norm_layer)(embed_dim[-1], epsilon=epsilon)
+        self.use_lenhead = use_lenhead
+        if use_lenhead:
+            self.len_conv = nn.Linear(embed_dim[2], self.out_channels)
+            self.hardswish_len = nn.Hardswish()
+            self.dropout_len = nn.Dropout(p=last_drop, mode="downscale_in_infer")
+
+        trunc_normal_(self.pos_embed)
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                zeros_(m.bias)
+        elif isinstance(m, nn.LayerNorm):
+            zeros_(m.bias)
+            ones_(m.weight)
+
+    def forward_features(self, x):
+        x = self.patch_embed(x)
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+        for blk in self.blocks1:
+            x = blk(x)
+        if self.patch_merging is not None:
+            x = self.sub_sample1(
+                x.transpose([0, 2, 1]).reshape(
+                    [0, self.embed_dim[0], self.HW[0], self.HW[1]]
+                )
+            )
+        for blk in self.blocks2:
+            x = blk(x)
+        if self.patch_merging is not None:
+            x = self.sub_sample2(
+                x.transpose([0, 2, 1]).reshape(
+                    [0, self.embed_dim[1], self.HW[0] // 2, self.HW[1]]
+                )
+            )
+        for blk in self.blocks3:
+            x = blk(x)
+        if not self.prenorm:
+            x = self.norm(x)
+        return x
+
+    def forward(self, x):
+        x = self.forward_features(x)
+        if self.use_lenhead:
+            len_x = self.len_conv(x.mean(1))
+            len_x = self.dropout_len(self.hardswish_len(len_x))
+        if self.last_stage:
+            if self.patch_merging is not None:
+                h = self.HW[0] // 4
+            else:
+                h = self.HW[0]
+            x = self.avg_pool(
+                x.transpose([0, 2, 1]).reshape([0, self.embed_dim[2], h, self.HW[1]])
+            )
+            x = self.last_conv(x)
+            x = self.hardswish(x)
+            x = self.dropout(x)
+        if self.use_lenhead:
+            return x, len_x
+        return x
diff --git a/docling_ibm_models/slanet_1m/modeling/heads/__init__.py b/docling_ibm_models/slanet_1m/modeling/heads/__init__.py
new file mode 100644
index 0000000..829728f
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/heads/__init__.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = ["build_head"]
+
+
+def build_head(config):
+
+    # rec head
+    from .rec_ctc_head import CTCHead
+    from .rec_att_head import AttentionHead
+    from .rec_nrtr_head import Transformer
+    from .rec_multi_head import MultiHead
+
+
+    from .table_att_head import TableAttentionHead, SLAHead
+
+    support_dict = [
+        "SLAHead",
+    ]
+
+    # table head
+
+    module_name = config.pop("name")
+    assert module_name in support_dict, Exception(
+        "head only support {}".format(support_dict)
+    )
+    module_class = eval(module_name)(**config)
+    return module_class
diff --git a/docling_ibm_models/slanet_1m/modeling/heads/rec_att_head.py b/docling_ibm_models/slanet_1m/modeling/heads/rec_att_head.py
new file mode 100644
index 0000000..2c952ce
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/heads/rec_att_head.py
@@ -0,0 +1,215 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+import numpy as np
+
+
+class AttentionHead(nn.Layer):
+    def __init__(self, in_channels, out_channels, hidden_size, **kwargs):
+        super(AttentionHead, self).__init__()
+        self.input_size = in_channels
+        self.hidden_size = hidden_size
+        self.num_classes = out_channels
+
+        self.attention_cell = AttentionGRUCell(
+            in_channels, hidden_size, out_channels, use_gru=False
+        )
+        self.generator = nn.Linear(hidden_size, out_channels)
+
+    def _char_to_onehot(self, input_char, onehot_dim):
+        input_ont_hot = F.one_hot(input_char, onehot_dim)
+        return input_ont_hot
+
+    def forward(self, inputs, targets=None, batch_max_length=25):
+        batch_size = inputs.shape[0]
+        num_steps = batch_max_length
+
+        hidden = paddle.zeros((batch_size, self.hidden_size))
+        output_hiddens = []
+
+        if targets is not None:
+            for i in range(num_steps):
+                char_onehots = self._char_to_onehot(
+                    targets[:, i], onehot_dim=self.num_classes
+                )
+                (outputs, hidden), alpha = self.attention_cell(
+                    hidden, inputs, char_onehots
+                )
+                output_hiddens.append(paddle.unsqueeze(outputs, axis=1))
+            output = paddle.concat(output_hiddens, axis=1)
+            probs = self.generator(output)
+        else:
+            targets = paddle.zeros(shape=[batch_size], dtype="int32")
+            probs = None
+            char_onehots = None
+            outputs = None
+            alpha = None
+
+            for i in range(num_steps):
+                char_onehots = self._char_to_onehot(
+                    targets, onehot_dim=self.num_classes
+                )
+                (outputs, hidden), alpha = self.attention_cell(
+                    hidden, inputs, char_onehots
+                )
+                probs_step = self.generator(outputs)
+                if probs is None:
+                    probs = paddle.unsqueeze(probs_step, axis=1)
+                else:
+                    probs = paddle.concat(
+                        [probs, paddle.unsqueeze(probs_step, axis=1)], axis=1
+                    )
+                next_input = probs_step.argmax(axis=1)
+                targets = next_input
+        if not self.training:
+            probs = paddle.nn.functional.softmax(probs, axis=2)
+        return probs
+
+
+class AttentionGRUCell(nn.Layer):
+    def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False):
+        super(AttentionGRUCell, self).__init__()
+        self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False)
+        self.h2h = nn.Linear(hidden_size, hidden_size)
+        self.score = nn.Linear(hidden_size, 1, bias_attr=False)
+
+        self.rnn = nn.GRUCell(
+            input_size=input_size + num_embeddings, hidden_size=hidden_size
+        )
+
+        self.hidden_size = hidden_size
+
+    def forward(self, prev_hidden, batch_H, char_onehots):
+        batch_H_proj = self.i2h(batch_H)
+        prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1)
+
+        res = paddle.add(batch_H_proj, prev_hidden_proj)
+        res = paddle.tanh(res)
+        e = self.score(res)
+
+        alpha = F.softmax(e, axis=1)
+        alpha = paddle.transpose(alpha, [0, 2, 1])
+        context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1)
+        concat_context = paddle.concat([context, char_onehots], 1)
+
+        cur_hidden = self.rnn(concat_context, prev_hidden)
+
+        return cur_hidden, alpha
+
+
+class AttentionLSTM(nn.Layer):
+    def __init__(self, in_channels, out_channels, hidden_size, **kwargs):
+        super(AttentionLSTM, self).__init__()
+        self.input_size = in_channels
+        self.hidden_size = hidden_size
+        self.num_classes = out_channels
+
+        self.attention_cell = AttentionLSTMCell(
+            in_channels, hidden_size, out_channels, use_gru=False
+        )
+        self.generator = nn.Linear(hidden_size, out_channels)
+
+    def _char_to_onehot(self, input_char, onehot_dim):
+        input_ont_hot = F.one_hot(input_char, onehot_dim)
+        return input_ont_hot
+
+    def forward(self, inputs, targets=None, batch_max_length=25):
+        batch_size = inputs.shape[0]
+        num_steps = batch_max_length
+
+        hidden = (
+            paddle.zeros((batch_size, self.hidden_size)),
+            paddle.zeros((batch_size, self.hidden_size)),
+        )
+        output_hiddens = []
+
+        if targets is not None:
+            for i in range(num_steps):
+                # one-hot vectors for a i-th char
+                char_onehots = self._char_to_onehot(
+                    targets[:, i], onehot_dim=self.num_classes
+                )
+                hidden, alpha = self.attention_cell(hidden, inputs, char_onehots)
+
+                hidden = (hidden[1][0], hidden[1][1])
+                output_hiddens.append(paddle.unsqueeze(hidden[0], axis=1))
+            output = paddle.concat(output_hiddens, axis=1)
+            probs = self.generator(output)
+
+        else:
+            targets = paddle.zeros(shape=[batch_size], dtype="int32")
+            probs = None
+            char_onehots = None
+            alpha = None
+
+            for i in range(num_steps):
+                char_onehots = self._char_to_onehot(
+                    targets, onehot_dim=self.num_classes
+                )
+                hidden, alpha = self.attention_cell(hidden, inputs, char_onehots)
+                probs_step = self.generator(hidden[0])
+                hidden = (hidden[1][0], hidden[1][1])
+                if probs is None:
+                    probs = paddle.unsqueeze(probs_step, axis=1)
+                else:
+                    probs = paddle.concat(
+                        [probs, paddle.unsqueeze(probs_step, axis=1)], axis=1
+                    )
+
+                next_input = probs_step.argmax(axis=1)
+
+                targets = next_input
+        if not self.training:
+            probs = paddle.nn.functional.softmax(probs, axis=2)
+        return probs
+
+
+class AttentionLSTMCell(nn.Layer):
+    def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False):
+        super(AttentionLSTMCell, self).__init__()
+        self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False)
+        self.h2h = nn.Linear(hidden_size, hidden_size)
+        self.score = nn.Linear(hidden_size, 1, bias_attr=False)
+        if not use_gru:
+            self.rnn = nn.LSTMCell(
+                input_size=input_size + num_embeddings, hidden_size=hidden_size
+            )
+        else:
+            self.rnn = nn.GRUCell(
+                input_size=input_size + num_embeddings, hidden_size=hidden_size
+            )
+
+        self.hidden_size = hidden_size
+
+    def forward(self, prev_hidden, batch_H, char_onehots):
+        batch_H_proj = self.i2h(batch_H)
+        prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden[0]), axis=1)
+        res = paddle.add(batch_H_proj, prev_hidden_proj)
+        res = paddle.tanh(res)
+        e = self.score(res)
+
+        alpha = F.softmax(e, axis=1)
+        alpha = paddle.transpose(alpha, [0, 2, 1])
+        context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1)
+        concat_context = paddle.concat([context, char_onehots], 1)
+        cur_hidden = self.rnn(concat_context, prev_hidden)
+
+        return cur_hidden, alpha
diff --git a/docling_ibm_models/slanet_1m/modeling/heads/rec_ctc_head.py b/docling_ibm_models/slanet_1m/modeling/heads/rec_ctc_head.py
new file mode 100644
index 0000000..5e19a9a
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/heads/rec_ctc_head.py
@@ -0,0 +1,92 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle
+from paddle import ParamAttr, nn
+from paddle.nn import functional as F
+
+
+def get_para_bias_attr(l2_decay, k):
+    regularizer = paddle.regularizer.L2Decay(l2_decay)
+    stdv = 1.0 / math.sqrt(k * 1.0)
+    initializer = nn.initializer.Uniform(-stdv, stdv)
+    weight_attr = ParamAttr(regularizer=regularizer, initializer=initializer)
+    bias_attr = ParamAttr(regularizer=regularizer, initializer=initializer)
+    return [weight_attr, bias_attr]
+
+
+class CTCHead(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        fc_decay=0.0004,
+        mid_channels=None,
+        return_feats=False,
+        **kwargs,
+    ):
+        super(CTCHead, self).__init__()
+        if mid_channels is None:
+            weight_attr, bias_attr = get_para_bias_attr(
+                l2_decay=fc_decay, k=in_channels
+            )
+            self.fc = nn.Linear(
+                in_channels, out_channels, weight_attr=weight_attr, bias_attr=bias_attr
+            )
+        else:
+            weight_attr1, bias_attr1 = get_para_bias_attr(
+                l2_decay=fc_decay, k=in_channels
+            )
+            self.fc1 = nn.Linear(
+                in_channels,
+                mid_channels,
+                weight_attr=weight_attr1,
+                bias_attr=bias_attr1,
+            )
+
+            weight_attr2, bias_attr2 = get_para_bias_attr(
+                l2_decay=fc_decay, k=mid_channels
+            )
+            self.fc2 = nn.Linear(
+                mid_channels,
+                out_channels,
+                weight_attr=weight_attr2,
+                bias_attr=bias_attr2,
+            )
+        self.out_channels = out_channels
+        self.mid_channels = mid_channels
+        self.return_feats = return_feats
+
+    def forward(self, x, targets=None):
+        if self.mid_channels is None:
+            predicts = self.fc(x)
+        else:
+            x = self.fc1(x)
+            predicts = self.fc2(x)
+
+        if self.return_feats:
+            result = (x, predicts)
+        else:
+            result = predicts
+        if not self.training:
+            predicts = F.softmax(predicts, axis=2)
+            result = predicts
+
+        return result
diff --git a/docling_ibm_models/slanet_1m/modeling/heads/rec_multi_head.py b/docling_ibm_models/slanet_1m/modeling/heads/rec_multi_head.py
new file mode 100644
index 0000000..a62ae40
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/heads/rec_multi_head.py
@@ -0,0 +1,152 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from modeling.necks.rnn import (
+    Im2Seq,
+    EncoderWithRNN,
+    EncoderWithFC,
+    SequenceEncoder,
+    EncoderWithSVTR,
+    trunc_normal_,
+    zeros_,
+)
+from .rec_ctc_head import CTCHead
+from .rec_nrtr_head import Transformer
+
+
+class FCTranspose(nn.Layer):
+    def __init__(self, in_channels, out_channels, only_transpose=False):
+        super().__init__()
+        self.only_transpose = only_transpose
+        if not self.only_transpose:
+            self.fc = nn.Linear(in_channels, out_channels, bias_attr=False)
+
+    def forward(self, x):
+        if self.only_transpose:
+            return x.transpose([0, 2, 1])
+        else:
+            return self.fc(x.transpose([0, 2, 1]))
+
+
+class AddPos(nn.Layer):
+    def __init__(self, dim, w):
+        super().__init__()
+        self.dec_pos_embed = self.create_parameter(
+            shape=[1, w, dim], default_initializer=zeros_
+        )
+        self.add_parameter("dec_pos_embed", self.dec_pos_embed)
+        trunc_normal_(self.dec_pos_embed)
+
+    def forward(self, x):
+        x = x + self.dec_pos_embed[:, : x.shape[1], :]
+        return x
+
+
+class MultiHead(nn.Layer):
+    def __init__(self, in_channels, out_channels_list, **kwargs):
+        super().__init__()
+        self.head_list = kwargs.pop("head_list")
+        self.use_pool = kwargs.get("use_pool", False)
+        self.use_pos = kwargs.get("use_pos", False)
+        self.in_channels = in_channels
+        if self.use_pool:
+            self.pool = nn.AvgPool2D(kernel_size=[3, 2], stride=[3, 2], padding=0)
+        self.gtc_head = "sar"
+        assert len(self.head_list) >= 2
+        for idx, head_name in enumerate(self.head_list):
+            name = list(head_name)[0]
+            if name == "SARHead":
+                # sar head
+                sar_args = self.head_list[idx][name]
+                self.sar_head = eval(name)(
+                    in_channels=in_channels,
+                    out_channels=out_channels_list["SARLabelDecode"],
+                    **sar_args,
+                )
+            elif name == "NRTRHead":
+                gtc_args = self.head_list[idx][name]
+                max_text_length = gtc_args.get("max_text_length", 25)
+                nrtr_dim = gtc_args.get("nrtr_dim", 256)
+                num_decoder_layers = gtc_args.get("num_decoder_layers", 4)
+                if self.use_pos:
+                    self.before_gtc = nn.Sequential(
+                        nn.Flatten(2),
+                        FCTranspose(in_channels, nrtr_dim),
+                        AddPos(nrtr_dim, 80),
+                    )
+                else:
+                    self.before_gtc = nn.Sequential(
+                        nn.Flatten(2), FCTranspose(in_channels, nrtr_dim)
+                    )
+
+                self.gtc_head = Transformer(
+                    d_model=nrtr_dim,
+                    nhead=nrtr_dim // 32,
+                    num_encoder_layers=-1,
+                    beam_size=-1,
+                    num_decoder_layers=num_decoder_layers,
+                    max_len=max_text_length,
+                    dim_feedforward=nrtr_dim * 4,
+                    out_channels=out_channels_list["NRTRLabelDecode"],
+                )
+            elif name == "CTCHead":
+                # ctc neck
+                self.encoder_reshape = Im2Seq(in_channels)
+                neck_args = self.head_list[idx][name]["Neck"]
+                encoder_type = neck_args.pop("name")
+                self.ctc_encoder = SequenceEncoder(
+                    in_channels=in_channels, encoder_type=encoder_type, **neck_args
+                )
+                # ctc head
+                head_args = self.head_list[idx][name]["Head"]
+                self.ctc_head = eval(name)(
+                    in_channels=self.ctc_encoder.out_channels,
+                    out_channels=out_channels_list["CTCLabelDecode"],
+                    **head_args,
+                )
+            else:
+                raise NotImplementedError(
+                    "{} is not supported in MultiHead yet".format(name)
+                )
+
+    def forward(self, x, targets=None):
+        if self.use_pool:
+            x = self.pool(
+                x.reshape([0, 3, -1, self.in_channels]).transpose([0, 3, 1, 2])
+            )
+        ctc_encoder = self.ctc_encoder(x)
+        ctc_out = self.ctc_head(ctc_encoder, targets)
+        head_out = dict()
+        head_out["ctc"] = ctc_out
+        head_out["ctc_neck"] = ctc_encoder
+        # eval mode
+        if not self.training:
+            return ctc_out
+        if self.gtc_head == "sar":
+            sar_out = self.sar_head(x, targets[1:])
+            head_out["sar"] = sar_out
+        else:
+            gtc_out = self.gtc_head(self.before_gtc(x), targets[1:])
+            head_out["gtc"] = gtc_out
+        return head_out
diff --git a/docling_ibm_models/slanet_1m/modeling/heads/rec_nrtr_head.py b/docling_ibm_models/slanet_1m/modeling/heads/rec_nrtr_head.py
new file mode 100644
index 0000000..b13a849
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/heads/rec_nrtr_head.py
@@ -0,0 +1,704 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle.nn import Dropout, LayerNorm
+import numpy as np
+from modeling.backbones.rec_svtrnet import Mlp, zeros_
+from paddle.nn.initializer import XavierNormal as xavier_normal_
+
+
+class Transformer(nn.Layer):
+    """A transformer model. User is able to modify the attributes as needed. The architechture
+    is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer,
+    Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and
+    Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information
+    Processing Systems, pages 6000-6010.
+
+    Args:
+        d_model: the number of expected features in the encoder/decoder inputs (default=512).
+        nhead: the number of heads in the multiheadattention models (default=8).
+        num_encoder_layers: the number of sub-encoder-layers in the encoder (default=6).
+        num_decoder_layers: the number of sub-decoder-layers in the decoder (default=6).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        custom_encoder: custom encoder (default=None).
+        custom_decoder: custom decoder (default=None).
+    """
+
+    def __init__(
+        self,
+        d_model=512,
+        nhead=8,
+        num_encoder_layers=6,
+        beam_size=0,
+        num_decoder_layers=6,
+        max_len=25,
+        dim_feedforward=1024,
+        attention_dropout_rate=0.0,
+        residual_dropout_rate=0.1,
+        in_channels=0,
+        out_channels=0,
+        scale_embedding=True,
+    ):
+        super(Transformer, self).__init__()
+        self.out_channels = out_channels + 1
+        self.max_len = max_len
+        self.embedding = Embeddings(
+            d_model=d_model,
+            vocab=self.out_channels,
+            padding_idx=0,
+            scale_embedding=scale_embedding,
+        )
+        self.positional_encoding = PositionalEncoding(
+            dropout=residual_dropout_rate, dim=d_model
+        )
+
+        if num_encoder_layers > 0:
+            self.encoder = nn.LayerList(
+                [
+                    TransformerBlock(
+                        d_model,
+                        nhead,
+                        dim_feedforward,
+                        attention_dropout_rate,
+                        residual_dropout_rate,
+                        with_self_attn=True,
+                        with_cross_attn=False,
+                    )
+                    for i in range(num_encoder_layers)
+                ]
+            )
+        else:
+            self.encoder = None
+
+        self.decoder = nn.LayerList(
+            [
+                TransformerBlock(
+                    d_model,
+                    nhead,
+                    dim_feedforward,
+                    attention_dropout_rate,
+                    residual_dropout_rate,
+                    with_self_attn=True,
+                    with_cross_attn=True,
+                )
+                for i in range(num_decoder_layers)
+            ]
+        )
+
+        self.beam_size = beam_size
+        self.d_model = d_model
+        self.nhead = nhead
+        self.tgt_word_prj = nn.Linear(d_model, self.out_channels, bias_attr=False)
+        w0 = np.random.normal(
+            0.0, d_model**-0.5, (d_model, self.out_channels)
+        ).astype(np.float32)
+        self.tgt_word_prj.weight.set_value(w0)
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            xavier_normal_(m.weight)
+            if m.bias is not None:
+                zeros_(m.bias)
+
+    def forward_train(self, src, tgt):
+        tgt = tgt[:, :-1]
+
+        tgt = self.embedding(tgt)
+        tgt = self.positional_encoding(tgt)
+        tgt_mask = self.generate_square_subsequent_mask(tgt.shape[1])
+
+        if self.encoder is not None:
+            src = self.positional_encoding(src)
+            for encoder_layer in self.encoder:
+                src = encoder_layer(src)
+            memory = src  # B N C
+        else:
+            memory = src  # B N C
+        for decoder_layer in self.decoder:
+            tgt = decoder_layer(tgt, memory, self_mask=tgt_mask)
+        output = tgt
+        logit = self.tgt_word_prj(output)
+        return logit
+
+    def forward(self, src, targets=None):
+        """Take in and process masked source/target sequences.
+        Args:
+            src: the sequence to the encoder (required).
+            tgt: the sequence to the decoder (required).
+        Shape:
+            - src: :math:`(B, sN, C)`.
+            - tgt: :math:`(B, tN, C)`.
+        Examples:
+            >>> output = transformer_model(src, tgt)
+        """
+
+        if self.training:
+            max_len = targets[1].max()
+            tgt = targets[0][:, : 2 + max_len]
+            return self.forward_train(src, tgt)
+        else:
+            if self.beam_size > 0:
+                return self.forward_beam(src)
+            else:
+                return self.forward_test(src)
+
+    def forward_test(self, src):
+        bs = src.shape[0]
+        if self.encoder is not None:
+            src = self.positional_encoding(src)
+            for encoder_layer in self.encoder:
+                src = encoder_layer(src)
+            memory = src  # B N C
+        else:
+            memory = src
+        dec_seq = paddle.full((bs, 1), 2, dtype=paddle.int64)
+        dec_prob = paddle.full((bs, 1), 1.0, dtype=paddle.float32)
+        for len_dec_seq in range(1, paddle.to_tensor(self.max_len)):
+            dec_seq_embed = self.embedding(dec_seq)
+            dec_seq_embed = self.positional_encoding(dec_seq_embed)
+            tgt_mask = self.generate_square_subsequent_mask(dec_seq_embed.shape[1])
+            tgt = dec_seq_embed
+            for decoder_layer in self.decoder:
+                tgt = decoder_layer(tgt, memory, self_mask=tgt_mask)
+            dec_output = tgt
+            dec_output = dec_output[:, -1, :]
+            word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=-1)
+            preds_idx = paddle.argmax(word_prob, axis=-1)
+            if paddle.equal_all(
+                preds_idx, paddle.full(preds_idx.shape, 3, dtype="int64")
+            ):
+                break
+            preds_prob = paddle.max(word_prob, axis=-1)
+            dec_seq = paddle.concat(
+                [dec_seq, paddle.reshape(preds_idx, [-1, 1])], axis=1
+            )
+            dec_prob = paddle.concat(
+                [dec_prob, paddle.reshape(preds_prob, [-1, 1])], axis=1
+            )
+        return [dec_seq, dec_prob]
+
+    def forward_beam(self, images):
+        """Translation work in one batch"""
+
+        def get_inst_idx_to_tensor_position_map(inst_idx_list):
+            """Indicate the position of an instance in a tensor."""
+            return {
+                inst_idx: tensor_position
+                for tensor_position, inst_idx in enumerate(inst_idx_list)
+            }
+
+        def collect_active_part(
+            beamed_tensor, curr_active_inst_idx, n_prev_active_inst, n_bm
+        ):
+            """Collect tensor parts associated to active instances."""
+
+            beamed_tensor_shape = beamed_tensor.shape
+            n_curr_active_inst = len(curr_active_inst_idx)
+            new_shape = (
+                n_curr_active_inst * n_bm,
+                beamed_tensor_shape[1],
+                beamed_tensor_shape[2],
+            )
+
+            beamed_tensor = beamed_tensor.reshape([n_prev_active_inst, -1])
+            beamed_tensor = beamed_tensor.index_select(curr_active_inst_idx, axis=0)
+            beamed_tensor = beamed_tensor.reshape(new_shape)
+
+            return beamed_tensor
+
+        def collate_active_info(
+            src_enc, inst_idx_to_position_map, active_inst_idx_list
+        ):
+            # Sentences which are still active are collected,
+            # so the decoder will not run on completed sentences.
+
+            n_prev_active_inst = len(inst_idx_to_position_map)
+            active_inst_idx = [
+                inst_idx_to_position_map[k] for k in active_inst_idx_list
+            ]
+            active_inst_idx = paddle.to_tensor(active_inst_idx, dtype="int64")
+            active_src_enc = collect_active_part(
+                src_enc.transpose([1, 0, 2]), active_inst_idx, n_prev_active_inst, n_bm
+            ).transpose([1, 0, 2])
+            active_inst_idx_to_position_map = get_inst_idx_to_tensor_position_map(
+                active_inst_idx_list
+            )
+            return active_src_enc, active_inst_idx_to_position_map
+
+        def beam_decode_step(
+            inst_dec_beams, len_dec_seq, enc_output, inst_idx_to_position_map, n_bm
+        ):
+            """Decode and update beam status, and then return active beam idx"""
+
+            def prepare_beam_dec_seq(inst_dec_beams, len_dec_seq):
+                dec_partial_seq = [
+                    b.get_current_state() for b in inst_dec_beams if not b.done
+                ]
+                dec_partial_seq = paddle.stack(dec_partial_seq)
+                dec_partial_seq = dec_partial_seq.reshape([-1, len_dec_seq])
+                return dec_partial_seq
+
+            def predict_word(dec_seq, enc_output, n_active_inst, n_bm):
+                dec_seq = self.embedding(dec_seq)
+                dec_seq = self.positional_encoding(dec_seq)
+                tgt_mask = self.generate_square_subsequent_mask(dec_seq.shape[1])
+                tgt = dec_seq
+                for decoder_layer in self.decoder:
+                    tgt = decoder_layer(tgt, enc_output, self_mask=tgt_mask)
+                dec_output = tgt
+                dec_output = dec_output[:, -1, :]  # Pick the last step: (bh * bm) * d_h
+                word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1)
+                word_prob = paddle.reshape(word_prob, [n_active_inst, n_bm, -1])
+                return word_prob
+
+            def collect_active_inst_idx_list(
+                inst_beams, word_prob, inst_idx_to_position_map
+            ):
+                active_inst_idx_list = []
+                for inst_idx, inst_position in inst_idx_to_position_map.items():
+                    is_inst_complete = inst_beams[inst_idx].advance(
+                        word_prob[inst_position]
+                    )
+                    if not is_inst_complete:
+                        active_inst_idx_list += [inst_idx]
+
+                return active_inst_idx_list
+
+            n_active_inst = len(inst_idx_to_position_map)
+            dec_seq = prepare_beam_dec_seq(inst_dec_beams, len_dec_seq)
+            word_prob = predict_word(dec_seq, enc_output, n_active_inst, n_bm)
+            # Update the beam with predicted word prob information and collect incomplete instances
+            active_inst_idx_list = collect_active_inst_idx_list(
+                inst_dec_beams, word_prob, inst_idx_to_position_map
+            )
+            return active_inst_idx_list
+
+        def collect_hypothesis_and_scores(inst_dec_beams, n_best):
+            all_hyp, all_scores = [], []
+            for inst_idx in range(len(inst_dec_beams)):
+                scores, tail_idxs = inst_dec_beams[inst_idx].sort_scores()
+                all_scores += [scores[:n_best]]
+                hyps = [
+                    inst_dec_beams[inst_idx].get_hypothesis(i)
+                    for i in tail_idxs[:n_best]
+                ]
+                all_hyp += [hyps]
+            return all_hyp, all_scores
+
+        with paddle.no_grad():
+            # -- Encode
+            if self.encoder is not None:
+                src = self.positional_encoding(images)
+                src_enc = self.encoder(src)
+            else:
+                src_enc = images
+
+            n_bm = self.beam_size
+            src_shape = src_enc.shape
+            inst_dec_beams = [Beam(n_bm) for _ in range(1)]
+            active_inst_idx_list = list(range(1))
+            # Repeat data for beam search
+            src_enc = paddle.tile(src_enc, [1, n_bm, 1])
+            inst_idx_to_position_map = get_inst_idx_to_tensor_position_map(
+                active_inst_idx_list
+            )
+            # Decode
+            for len_dec_seq in range(1, paddle.to_tensor(self.max_len)):
+                src_enc_copy = src_enc.clone()
+                active_inst_idx_list = beam_decode_step(
+                    inst_dec_beams,
+                    len_dec_seq,
+                    src_enc_copy,
+                    inst_idx_to_position_map,
+                    n_bm,
+                )
+                if not active_inst_idx_list:
+                    break  # all instances have finished their path to <EOS>
+                src_enc, inst_idx_to_position_map = collate_active_info(
+                    src_enc_copy, inst_idx_to_position_map, active_inst_idx_list
+                )
+        batch_hyp, batch_scores = collect_hypothesis_and_scores(inst_dec_beams, 1)
+        result_hyp = []
+        hyp_scores = []
+        for bs_hyp, score in zip(batch_hyp, batch_scores):
+            l = len(bs_hyp[0])
+            bs_hyp_pad = bs_hyp[0] + [3] * (25 - l)
+            result_hyp.append(bs_hyp_pad)
+            score = float(score) / l
+            hyp_score = [score for _ in range(25)]
+            hyp_scores.append(hyp_score)
+        return [
+            paddle.to_tensor(np.array(result_hyp), dtype=paddle.int64),
+            paddle.to_tensor(hyp_scores),
+        ]
+
+    def generate_square_subsequent_mask(self, sz):
+        """Generate a square mask for the sequence. The masked positions are filled with float('-inf').
+        Unmasked positions are filled with float(0.0).
+        """
+        mask = paddle.zeros([sz, sz], dtype="float32")
+        mask_inf = paddle.triu(
+            paddle.full(shape=[sz, sz], dtype="float32", fill_value="-inf"), diagonal=1
+        )
+        mask = mask + mask_inf
+        return mask.unsqueeze([0, 1])
+
+
+class MultiheadAttention(nn.Layer):
+    """Allows the model to jointly attend to information
+    from different representation subspaces.
+    See reference: Attention Is All You Need
+
+    .. math::
+        \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O
+        \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V)
+
+    Args:
+        embed_dim: total dimension of the model
+        num_heads: parallel attention layers, or heads
+
+    """
+
+    def __init__(self, embed_dim, num_heads, dropout=0.0, self_attn=False):
+        super(MultiheadAttention, self).__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        # self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert (
+            self.head_dim * num_heads == self.embed_dim
+        ), "embed_dim must be divisible by num_heads"
+        self.scale = self.head_dim**-0.5
+        self.self_attn = self_attn
+        if self_attn:
+            self.qkv = nn.Linear(embed_dim, embed_dim * 3)
+        else:
+            self.q = nn.Linear(embed_dim, embed_dim)
+            self.kv = nn.Linear(embed_dim, embed_dim * 2)
+        self.attn_drop = nn.Dropout(dropout)
+        self.out_proj = nn.Linear(embed_dim, embed_dim)
+
+    def forward(self, query, key=None, attn_mask=None):
+        qN = query.shape[1]
+
+        if self.self_attn:
+            qkv = (
+                self.qkv(query)
+                .reshape((0, qN, 3, self.num_heads, self.head_dim))
+                .transpose((2, 0, 3, 1, 4))
+            )
+            q, k, v = qkv[0], qkv[1], qkv[2]
+        else:
+            kN = key.shape[1]
+            q = (
+                self.q(query)
+                .reshape([0, qN, self.num_heads, self.head_dim])
+                .transpose([0, 2, 1, 3])
+            )
+            kv = (
+                self.kv(key)
+                .reshape((0, kN, 2, self.num_heads, self.head_dim))
+                .transpose((2, 0, 3, 1, 4))
+            )
+            k, v = kv[0], kv[1]
+
+        attn = (q.matmul(k.transpose((0, 1, 3, 2)))) * self.scale
+
+        if attn_mask is not None:
+            attn += attn_mask
+
+        attn = F.softmax(attn, axis=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((0, qN, self.embed_dim))
+        x = self.out_proj(x)
+
+        return x
+
+
+class TransformerBlock(nn.Layer):
+    def __init__(
+        self,
+        d_model,
+        nhead,
+        dim_feedforward=2048,
+        attention_dropout_rate=0.0,
+        residual_dropout_rate=0.1,
+        with_self_attn=True,
+        with_cross_attn=False,
+        epsilon=1e-5,
+    ):
+        super(TransformerBlock, self).__init__()
+        self.with_self_attn = with_self_attn
+        if with_self_attn:
+            self.self_attn = MultiheadAttention(
+                d_model, nhead, dropout=attention_dropout_rate, self_attn=with_self_attn
+            )
+            self.norm1 = LayerNorm(d_model, epsilon=epsilon)
+            self.dropout1 = Dropout(residual_dropout_rate)
+        self.with_cross_attn = with_cross_attn
+        if with_cross_attn:
+            self.cross_attn = (
+                MultiheadAttention(  # for self_attn of encoder or cross_attn of decoder
+                    d_model, nhead, dropout=attention_dropout_rate
+                )
+            )
+            self.norm2 = LayerNorm(d_model, epsilon=epsilon)
+            self.dropout2 = Dropout(residual_dropout_rate)
+
+        self.mlp = Mlp(
+            in_features=d_model,
+            hidden_features=dim_feedforward,
+            act_layer=nn.ReLU,
+            drop=residual_dropout_rate,
+        )
+
+        self.norm3 = LayerNorm(d_model, epsilon=epsilon)
+
+        self.dropout3 = Dropout(residual_dropout_rate)
+
+    def forward(self, tgt, memory=None, self_mask=None, cross_mask=None):
+        if self.with_self_attn:
+            tgt1 = self.self_attn(tgt, attn_mask=self_mask)
+            tgt = self.norm1(tgt + self.dropout1(tgt1))
+
+        if self.with_cross_attn:
+            tgt2 = self.cross_attn(tgt, key=memory, attn_mask=cross_mask)
+            tgt = self.norm2(tgt + self.dropout2(tgt2))
+        tgt = self.norm3(tgt + self.dropout3(self.mlp(tgt)))
+        return tgt
+
+
+class PositionalEncoding(nn.Layer):
+    """Inject some information about the relative or absolute position of the tokens
+        in the sequence. The positional encodings have the same dimension as
+        the embeddings, so that the two can be summed. Here, we use sine and cosine
+        functions of different frequencies.
+    .. math::
+        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
+        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
+        \text{where pos is the word position and i is the embed idx)
+    Args:
+        d_model: the embed dim (required).
+        dropout: the dropout value (default=0.1).
+        max_len: the max. length of the incoming sequence (default=5000).
+    Examples:
+        >>> pos_encoder = PositionalEncoding(d_model)
+    """
+
+    def __init__(self, dropout, dim, max_len=5000):
+        super(PositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+
+        pe = paddle.zeros([max_len, dim])
+        position = paddle.arange(0, max_len, dtype=paddle.float32).unsqueeze(1)
+        div_term = paddle.exp(
+            paddle.arange(0, dim, 2).astype("float32") * (-math.log(10000.0) / dim)
+        )
+        pe[:, 0::2] = paddle.sin(position * div_term)
+        pe[:, 1::2] = paddle.cos(position * div_term)
+        pe = paddle.unsqueeze(pe, 0)
+        pe = paddle.transpose(pe, [1, 0, 2])
+        self.register_buffer("pe", pe)
+
+    def forward(self, x):
+        """Inputs of forward function
+        Args:
+            x: the sequence fed to the positional encoder model (required).
+        Shape:
+            x: [sequence length, batch size, embed dim]
+            output: [sequence length, batch size, embed dim]
+        Examples:
+            >>> output = pos_encoder(x)
+        """
+        x = x.transpose([1, 0, 2])
+        x = x + self.pe[: x.shape[0], :]
+        return self.dropout(x).transpose([1, 0, 2])
+
+
+class PositionalEncoding_2d(nn.Layer):
+    """Inject some information about the relative or absolute position of the tokens
+        in the sequence. The positional encodings have the same dimension as
+        the embeddings, so that the two can be summed. Here, we use sine and cosine
+        functions of different frequencies.
+    .. math::
+        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
+        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
+        \text{where pos is the word position and i is the embed idx)
+    Args:
+        d_model: the embed dim (required).
+        dropout: the dropout value (default=0.1).
+        max_len: the max. length of the incoming sequence (default=5000).
+    Examples:
+        >>> pos_encoder = PositionalEncoding(d_model)
+    """
+
+    def __init__(self, dropout, dim, max_len=5000):
+        super(PositionalEncoding_2d, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+
+        pe = paddle.zeros([max_len, dim])
+        position = paddle.arange(0, max_len, dtype=paddle.float32).unsqueeze(1)
+        div_term = paddle.exp(
+            paddle.arange(0, dim, 2).astype("float32") * (-math.log(10000.0) / dim)
+        )
+        pe[:, 0::2] = paddle.sin(position * div_term)
+        pe[:, 1::2] = paddle.cos(position * div_term)
+        pe = paddle.transpose(paddle.unsqueeze(pe, 0), [1, 0, 2])
+        self.register_buffer("pe", pe)
+
+        self.avg_pool_1 = nn.AdaptiveAvgPool2D((1, 1))
+        self.linear1 = nn.Linear(dim, dim)
+        self.linear1.weight.data.fill_(1.0)
+        self.avg_pool_2 = nn.AdaptiveAvgPool2D((1, 1))
+        self.linear2 = nn.Linear(dim, dim)
+        self.linear2.weight.data.fill_(1.0)
+
+    def forward(self, x):
+        """Inputs of forward function
+        Args:
+            x: the sequence fed to the positional encoder model (required).
+        Shape:
+            x: [sequence length, batch size, embed dim]
+            output: [sequence length, batch size, embed dim]
+        Examples:
+            >>> output = pos_encoder(x)
+        """
+        w_pe = self.pe[: x.shape[-1], :]
+        w1 = self.linear1(self.avg_pool_1(x).squeeze()).unsqueeze(0)
+        w_pe = w_pe * w1
+        w_pe = paddle.transpose(w_pe, [1, 2, 0])
+        w_pe = paddle.unsqueeze(w_pe, 2)
+
+        h_pe = self.pe[: x.shape.shape[-2], :]
+        w2 = self.linear2(self.avg_pool_2(x).squeeze()).unsqueeze(0)
+        h_pe = h_pe * w2
+        h_pe = paddle.transpose(h_pe, [1, 2, 0])
+        h_pe = paddle.unsqueeze(h_pe, 3)
+
+        x = x + w_pe + h_pe
+        x = paddle.transpose(
+            paddle.reshape(x, [x.shape[0], x.shape[1], x.shape[2] * x.shape[3]]),
+            [2, 0, 1],
+        )
+
+        return self.dropout(x)
+
+
+class Embeddings(nn.Layer):
+    def __init__(self, d_model, vocab, padding_idx=None, scale_embedding=True):
+        super(Embeddings, self).__init__()
+        self.embedding = nn.Embedding(vocab, d_model, padding_idx=padding_idx)
+        w0 = np.random.normal(0.0, d_model**-0.5, (vocab, d_model)).astype(np.float32)
+        self.embedding.weight.set_value(w0)
+        self.d_model = d_model
+        self.scale_embedding = scale_embedding
+
+    def forward(self, x):
+        if self.scale_embedding:
+            x = self.embedding(x)
+            return x * math.sqrt(self.d_model)
+        return self.embedding(x)
+
+
+class Beam:
+    """Beam search"""
+
+    def __init__(self, size, device=False):
+        self.size = size
+        self._done = False
+        # The score for each translation on the beam.
+        self.scores = paddle.zeros((size,), dtype=paddle.float32)
+        self.all_scores = []
+        # The backpointers at each time-step.
+        self.prev_ks = []
+        # The outputs at each time-step.
+        self.next_ys = [paddle.full((size,), 0, dtype=paddle.int64)]
+        self.next_ys[0][0] = 2
+
+    def get_current_state(self):
+        "Get the outputs for the current timestep."
+        return self.get_tentative_hypothesis()
+
+    def get_current_origin(self):
+        "Get the backpointers for the current timestep."
+        return self.prev_ks[-1]
+
+    @property
+    def done(self):
+        return self._done
+
+    def advance(self, word_prob):
+        "Update beam status and check if finished or not."
+        num_words = word_prob.shape[1]
+
+        # Sum the previous scores.
+        if len(self.prev_ks) > 0:
+            beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob)
+        else:
+            beam_lk = word_prob[0]
+
+        flat_beam_lk = beam_lk.reshape([-1])
+        best_scores, best_scores_id = flat_beam_lk.topk(
+            self.size, 0, True, True
+        )  # 1st sort
+        self.all_scores.append(self.scores)
+        self.scores = best_scores
+        # bestScoresId is flattened as a (beam x word) array,
+        # so we need to calculate which word and beam each score came from
+        prev_k = best_scores_id // num_words
+        self.prev_ks.append(prev_k)
+        self.next_ys.append(best_scores_id - prev_k * num_words)
+        # End condition is when top-of-beam is EOS.
+        if self.next_ys[-1][0] == 3:
+            self._done = True
+            self.all_scores.append(self.scores)
+
+        return self._done
+
+    def sort_scores(self):
+        "Sort the scores."
+        return self.scores, paddle.to_tensor(
+            [i for i in range(int(self.scores.shape[0]))], dtype="int32"
+        )
+
+    def get_the_best_score_and_idx(self):
+        "Get the score of the best in the beam."
+        scores, ids = self.sort_scores()
+        return scores[1], ids[1]
+
+    def get_tentative_hypothesis(self):
+        "Get the decoded sequence for the current timestep."
+        if len(self.next_ys) == 1:
+            dec_seq = self.next_ys[0].unsqueeze(1)
+        else:
+            _, keys = self.sort_scores()
+            hyps = [self.get_hypothesis(k) for k in keys]
+            hyps = [[2] + h for h in hyps]
+            dec_seq = paddle.to_tensor(hyps, dtype="int64")
+        return dec_seq
+
+    def get_hypothesis(self, k):
+        """Walk back to construct the full hypothesis."""
+        hyp = []
+        for j in range(len(self.prev_ks) - 1, -1, -1):
+            hyp.append(self.next_ys[j + 1][k])
+            k = self.prev_ks[j][k]
+        return list(map(lambda x: x.item(), hyp[::-1]))
diff --git a/docling_ibm_models/slanet_1m/modeling/heads/table_att_head.py b/docling_ibm_models/slanet_1m/modeling/heads/table_att_head.py
new file mode 100644
index 0000000..4202283
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/heads/table_att_head.py
@@ -0,0 +1,413 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+import paddle.nn as nn
+from paddle import ParamAttr
+import paddle.nn.functional as F
+import numpy as np
+
+from .rec_att_head import AttentionGRUCell
+from modeling.backbones.rec_svtrnet import DropPath, Identity, Mlp
+
+
+def get_para_bias_attr(l2_decay, k):
+    if l2_decay > 0:
+        regularizer = paddle.regularizer.L2Decay(l2_decay)
+        stdv = 1.0 / math.sqrt(k * 1.0)
+        initializer = nn.initializer.Uniform(-stdv, stdv)
+    else:
+        regularizer = None
+        initializer = None
+    weight_attr = ParamAttr(regularizer=regularizer, initializer=initializer)
+    bias_attr = ParamAttr(regularizer=regularizer, initializer=initializer)
+    return [weight_attr, bias_attr]
+
+
+class TableAttentionHead(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        hidden_size,
+        in_max_len=488,
+        max_text_length=800,
+        out_channels=30,
+        loc_reg_num=4,
+        **kwargs,
+    ):
+        super(TableAttentionHead, self).__init__()
+        self.input_size = in_channels[-1]
+        self.hidden_size = hidden_size
+        self.out_channels = out_channels
+        self.max_text_length = max_text_length
+
+        self.structure_attention_cell = AttentionGRUCell(
+            self.input_size, hidden_size, self.out_channels, use_gru=False
+        )
+        self.structure_generator = nn.Linear(hidden_size, self.out_channels)
+        self.in_max_len = in_max_len
+
+        if self.in_max_len == 640:
+            self.loc_fea_trans = nn.Linear(400, self.max_text_length + 1)
+        elif self.in_max_len == 800:
+            self.loc_fea_trans = nn.Linear(625, self.max_text_length + 1)
+        else:
+            self.loc_fea_trans = nn.Linear(256, self.max_text_length + 1)
+        self.loc_generator = nn.Linear(self.input_size + hidden_size, loc_reg_num)
+
+    def _char_to_onehot(self, input_char, onehot_dim):
+        input_ont_hot = F.one_hot(input_char, onehot_dim)
+        return input_ont_hot
+
+    def forward(self, inputs, targets=None):
+        # if and else branch are both needed when you want to assign a variable
+        # if you modify the var in just one branch, then the modification will not work.
+        fea = inputs[-1]
+        last_shape = int(np.prod(fea.shape[2:]))  # gry added
+        fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], last_shape])
+        fea = fea.transpose([0, 2, 1])  # (NTC)(batch, width, channels)
+        batch_size = fea.shape[0]
+
+        hidden = paddle.zeros((batch_size, self.hidden_size))
+        output_hiddens = paddle.zeros(
+            (batch_size, self.max_text_length + 1, self.hidden_size)
+        )
+        if self.training and targets is not None:
+            structure = targets[0]
+            for i in range(self.max_text_length + 1):
+                elem_onehots = self._char_to_onehot(
+                    structure[:, i], onehot_dim=self.out_channels
+                )
+                (outputs, hidden), alpha = self.structure_attention_cell(
+                    hidden, fea, elem_onehots
+                )
+                output_hiddens[:, i, :] = outputs
+            structure_probs = self.structure_generator(output_hiddens)
+            loc_fea = fea.transpose([0, 2, 1])
+            loc_fea = self.loc_fea_trans(loc_fea)
+            loc_fea = loc_fea.transpose([0, 2, 1])
+            loc_concat = paddle.concat([output_hiddens, loc_fea], axis=2)
+            loc_preds = self.loc_generator(loc_concat)
+            loc_preds = F.sigmoid(loc_preds)
+        else:
+            temp_elem = paddle.zeros(shape=[batch_size], dtype="int32")
+            structure_probs = None
+            loc_preds = None
+            elem_onehots = None
+            outputs = None
+            alpha = None
+            max_text_length = paddle.to_tensor(self.max_text_length)
+            for i in range(max_text_length + 1):
+                elem_onehots = self._char_to_onehot(
+                    temp_elem, onehot_dim=self.out_channels
+                )
+                (outputs, hidden), alpha = self.structure_attention_cell(
+                    hidden, fea, elem_onehots
+                )
+                output_hiddens[:, i, :] = outputs
+                structure_probs_step = self.structure_generator(outputs)
+                temp_elem = structure_probs_step.argmax(axis=1, dtype="int32")
+
+            structure_probs = self.structure_generator(output_hiddens)
+            structure_probs = F.softmax(structure_probs)
+            loc_fea = fea.transpose([0, 2, 1])
+            loc_fea = self.loc_fea_trans(loc_fea)
+            loc_fea = loc_fea.transpose([0, 2, 1])
+            loc_concat = paddle.concat([output_hiddens, loc_fea], axis=2)
+            loc_preds = self.loc_generator(loc_concat)
+            loc_preds = F.sigmoid(loc_preds)
+        return {"structure_probs": structure_probs, "loc_preds": loc_preds}
+
+
+class HWAttention(nn.Layer):
+    def __init__(
+        self,
+        head_dim=32,
+        qk_scale=None,
+        attn_drop=0.0,
+    ):
+        super().__init__()
+        self.head_dim = head_dim
+        self.scale = qk_scale or self.head_dim**-0.5
+        self.attn_drop = nn.Dropout(attn_drop)
+
+    def forward(self, x):
+        B, N, C = x.shape
+        C = C // 3
+        qkv = x.reshape([B, N, 3, C // self.head_dim, self.head_dim]).transpose(
+            [2, 0, 3, 1, 4]
+        )
+        q, k, v = qkv.unbind(0)
+        attn = q @ k.transpose([0, 1, 3, 2]) * self.scale
+        attn = F.softmax(attn, -1)
+        attn = self.attn_drop(attn)
+        x = attn @ v
+        x = x.transpose([0, 2, 1]).reshape([B, N, C])
+        return x
+
+
+def img2windows(img, H_sp, W_sp):
+    """
+    img: B C H W
+    """
+    B, H, W, C = img.shape
+    img_reshape = img.reshape([B, H // H_sp, H_sp, W // W_sp, W_sp, C])
+    img_perm = img_reshape.transpose([0, 1, 3, 2, 4, 5]).reshape([-1, H_sp * W_sp, C])
+    return img_perm
+
+
+def windows2img(img_splits_hw, H_sp, W_sp, H, W):
+    """
+    img_splits_hw: B' H W C
+    """
+    B = int(img_splits_hw.shape[0] / (H * W / H_sp / W_sp))
+
+    img = img_splits_hw.reshape([B, H // H_sp, W // W_sp, H_sp, W_sp, -1])
+    img = img.transpose([0, 1, 3, 2, 4, 5]).flatten(1, 4)
+    return img
+
+
+class Block(nn.Layer):
+    def __init__(
+        self,
+        dim,
+        num_heads,
+        split_h=4,
+        split_w=4,
+        h_num_heads=None,
+        w_num_heads=None,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        act_layer=nn.GELU,
+        norm_layer=nn.LayerNorm,
+        eps=1e-6,
+    ):
+        super().__init__()
+        self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
+        self.proj = nn.Linear(dim, dim)
+        self.split_h = split_h
+        self.split_w = split_w
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.norm1 = norm_layer(dim, epsilon=eps)
+        self.h_num_heads = h_num_heads if h_num_heads is not None else num_heads // 2
+        self.w_num_heads = w_num_heads if w_num_heads is not None else num_heads // 2
+        self.head_dim = dim // num_heads
+        self.mixer = HWAttention(
+            head_dim=dim // num_heads,
+            qk_scale=qk_scale,
+            attn_drop=attn_drop,
+        )
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
+        self.norm2 = norm_layer(dim, epsilon=eps)
+        self.mlp = Mlp(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_layer=act_layer,
+            drop=drop,
+        )
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x = x.flatten(2).transpose([0, 2, 1])
+
+        qkv = self.qkv(x).reshape([B, H, W, 3 * C])
+
+        x1 = qkv[:, :, :, : 3 * self.h_num_heads * self.head_dim]  # b, h, w, 3ch
+        x2 = qkv[:, :, :, 3 * self.h_num_heads * self.head_dim :]  # b, h, w, 3cw
+
+        x1 = self.mixer(img2windows(x1, self.split_h, W))  # b*splith, W, 3ch
+        x2 = self.mixer(img2windows(x2, H, self.split_w))  # b*splitw, h, 3ch
+        x1 = windows2img(x1, self.split_h, W, H, W)
+        x2 = windows2img(x2, H, self.split_w, H, W)
+
+        attened_x = paddle.concat([x1, x2], 2)
+        attened_x = self.proj(attened_x)
+
+        x = self.norm1(x + self.drop_path(attened_x))
+        x = self.norm2(x + self.drop_path(self.mlp(x)))
+        x = x.transpose([0, 2, 1]).reshape([-1, C, H, W])
+        return x
+
+
+class SLAHead(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        hidden_size,
+        out_channels=30,
+        max_text_length=500,
+        loc_reg_num=4,
+        fc_decay=0.0,
+        use_attn=False,
+        **kwargs,
+    ):
+        """
+        @param in_channels: input shape
+        @param hidden_size: hidden_size for RNN and Embedding
+        @param out_channels: num_classes to rec
+        @param max_text_length: max text pred
+        """
+        super().__init__()
+        in_channels = in_channels[-1]
+        self.hidden_size = hidden_size
+        self.max_text_length = max_text_length
+        self.emb = self._char_to_onehot
+        self.num_embeddings = out_channels
+        self.loc_reg_num = loc_reg_num
+        self.eos = self.num_embeddings - 1
+
+        # structure
+        self.structure_attention_cell = AttentionGRUCell(
+            in_channels, hidden_size, self.num_embeddings
+        )
+        weight_attr, bias_attr = get_para_bias_attr(l2_decay=fc_decay, k=hidden_size)
+        weight_attr1_1, bias_attr1_1 = get_para_bias_attr(
+            l2_decay=fc_decay, k=hidden_size
+        )
+        weight_attr1_2, bias_attr1_2 = get_para_bias_attr(
+            l2_decay=fc_decay, k=hidden_size
+        )
+        self.structure_generator = nn.Sequential(
+            nn.Linear(
+                self.hidden_size,
+                self.hidden_size,
+                weight_attr=weight_attr1_2,
+                bias_attr=bias_attr1_2,
+            ),
+            nn.Linear(
+                hidden_size, out_channels, weight_attr=weight_attr, bias_attr=bias_attr
+            ),
+        )
+        dpr = np.linspace(0, 0.1, 2)
+
+        self.use_attn = use_attn
+        if use_attn:
+            layer_list = [
+                Block(
+                    in_channels,
+                    num_heads=2,
+                    mlp_ratio=4.0,
+                    qkv_bias=True,
+                    drop_path=dpr[i],
+                )
+                for i in range(2)
+            ]
+            self.cross_atten = nn.Sequential(*layer_list)
+        # loc
+        weight_attr1, bias_attr1 = get_para_bias_attr(
+            l2_decay=fc_decay, k=self.hidden_size
+        )
+        weight_attr2, bias_attr2 = get_para_bias_attr(
+            l2_decay=fc_decay, k=self.hidden_size
+        )
+        self.loc_generator = nn.Sequential(
+            nn.Linear(
+                self.hidden_size,
+                self.hidden_size,
+                weight_attr=weight_attr1,
+                bias_attr=bias_attr1,
+            ),
+            nn.Linear(
+                self.hidden_size,
+                loc_reg_num,
+                weight_attr=weight_attr2,
+                bias_attr=bias_attr2,
+            ),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, inputs, targets=None):
+        fea = inputs[-1]
+        batch_size = fea.shape[0]
+        if self.use_attn:
+            fea = fea + self.cross_atten(fea)
+        # reshape
+        fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], -1])
+        fea = fea.transpose([0, 2, 1])  # (NTC)(batch, width, channels)
+
+        hidden = paddle.zeros((batch_size, self.hidden_size))
+        structure_preds = paddle.zeros(
+            (batch_size, self.max_text_length + 1, self.num_embeddings)
+        )
+        loc_preds = paddle.zeros(
+            (batch_size, self.max_text_length + 1, self.loc_reg_num)
+        )
+        structure_preds.stop_gradient = True
+        loc_preds.stop_gradient = True
+
+        if self.training and targets is not None:
+            structure = targets[0]
+            max_len = targets[-2].max()
+            for i in range(max_len + 1):
+                hidden, structure_step, loc_step = self._decode(
+                    structure[:, i], fea, hidden
+                )
+                structure_preds[:, i, :] = structure_step
+                loc_preds[:, i, :] = loc_step
+            structure_preds = structure_preds[:, : max_len + 1]
+            loc_preds = loc_preds[:, : max_len + 1]
+        else:
+            structure_ids = paddle.zeros(
+                (batch_size, self.max_text_length + 1), dtype=paddle.int64
+            )
+            pre_chars = paddle.zeros(shape=[batch_size], dtype="int32")
+            max_text_length = paddle.to_tensor(self.max_text_length)
+            # for export
+            loc_step, structure_step = None, None
+            for i in range(max_text_length + 1):
+                hidden, structure_step, loc_step = self._decode(pre_chars, fea, hidden)
+                pre_chars = structure_step.argmax(axis=1, dtype="int32")
+                structure_preds[:, i, :] = structure_step
+                loc_preds[:, i, :] = loc_step
+
+                structure_ids[:, i] = pre_chars
+                if (structure_ids == self.eos).any(-1).all():
+                    break
+        if not self.training:
+            structure_preds = F.softmax(structure_preds[:, : i + 1])
+            loc_preds = loc_preds[:, : i + 1]
+        return {"structure_probs": structure_preds, "loc_preds": loc_preds}
+
+    def _decode(self, pre_chars, features, hidden):
+        """
+        Predict table label and coordinates for each step
+        @param pre_chars: Table label in previous step
+        @param features:
+        @param hidden: hidden status in previous step
+        @return:
+        """
+        emb_feature = self.emb(pre_chars)
+        # output shape is b * self.hidden_size
+        (output, hidden), alpha = self.structure_attention_cell(
+            hidden, features, emb_feature
+        )
+
+        # structure
+        structure_step = self.structure_generator(output)
+        # loc
+        loc_step = self.loc_generator(output)
+        return hidden, structure_step, loc_step
+
+    def _char_to_onehot(self, input_char):
+        input_ont_hot = F.one_hot(input_char, self.num_embeddings)
+        return input_ont_hot
diff --git a/docling_ibm_models/slanet_1m/modeling/necks/__init__.py b/docling_ibm_models/slanet_1m/modeling/necks/__init__.py
new file mode 100644
index 0000000..ef501f0
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/necks/__init__.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = ["build_neck"]
+
+
+def build_neck(config):
+    from .csp_pan import CSPPAN
+
+    support_dict = [
+        "CSPPAN",
+    ]
+
+    module_name = config.pop("name")
+    assert module_name in support_dict, Exception(
+        "neck only support {}".format(support_dict)
+    )
+
+    module_class = eval(module_name)(**config)
+    return module_class
diff --git a/docling_ibm_models/slanet_1m/modeling/necks/csp_pan.py b/docling_ibm_models/slanet_1m/modeling/necks/csp_pan.py
new file mode 100644
index 0000000..5e8464d
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/necks/csp_pan.py
@@ -0,0 +1,337 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The code is based on:
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release%2F2.3/ppdet/modeling/necks/csp_pan.py
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+__all__ = ["CSPPAN"]
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(
+        self,
+        in_channel=96,
+        out_channel=96,
+        kernel_size=3,
+        stride=1,
+        groups=1,
+        act="leaky_relu",
+    ):
+        super(ConvBNLayer, self).__init__()
+        initializer = nn.initializer.KaimingUniform()
+        self.act = act
+        assert self.act in ["leaky_relu", "hard_swish"]
+        self.conv = nn.Conv2D(
+            in_channels=in_channel,
+            out_channels=out_channel,
+            kernel_size=kernel_size,
+            groups=groups,
+            padding=(kernel_size - 1) // 2,
+            stride=stride,
+            weight_attr=ParamAttr(initializer=initializer),
+            bias_attr=False,
+        )
+        self.bn = nn.BatchNorm2D(out_channel)
+
+    def forward(self, x):
+        x = self.bn(self.conv(x))
+        if self.act == "leaky_relu":
+            x = F.leaky_relu(x)
+        elif self.act == "hard_swish":
+            x = F.hardswish(x)
+        return x
+
+
+class DPModule(nn.Layer):
+    """
+    Depth-wise and point-wise module.
+     Args:
+        in_channel (int): The input channels of this Module.
+        out_channel (int): The output channels of this Module.
+        kernel_size (int): The conv2d kernel size of this Module.
+        stride (int): The conv2d's stride of this Module.
+        act (str): The activation function of this Module,
+                   Now support `leaky_relu` and `hard_swish`.
+    """
+
+    def __init__(
+        self, in_channel=96, out_channel=96, kernel_size=3, stride=1, act="leaky_relu"
+    ):
+        super(DPModule, self).__init__()
+        initializer = nn.initializer.KaimingUniform()
+        self.act = act
+        self.dwconv = nn.Conv2D(
+            in_channels=in_channel,
+            out_channels=out_channel,
+            kernel_size=kernel_size,
+            groups=out_channel,
+            padding=(kernel_size - 1) // 2,
+            stride=stride,
+            weight_attr=ParamAttr(initializer=initializer),
+            bias_attr=False,
+        )
+        self.bn1 = nn.BatchNorm2D(out_channel)
+        self.pwconv = nn.Conv2D(
+            in_channels=out_channel,
+            out_channels=out_channel,
+            kernel_size=1,
+            groups=1,
+            padding=0,
+            weight_attr=ParamAttr(initializer=initializer),
+            bias_attr=False,
+        )
+        self.bn2 = nn.BatchNorm2D(out_channel)
+
+    def act_func(self, x):
+        if self.act == "leaky_relu":
+            x = F.leaky_relu(x)
+        elif self.act == "hard_swish":
+            x = F.hardswish(x)
+        return x
+
+    def forward(self, x):
+        x = self.act_func(self.bn1(self.dwconv(x)))
+        x = self.act_func(self.bn2(self.pwconv(x)))
+        return x
+
+
+class DarknetBottleneck(nn.Layer):
+    """The basic bottleneck block used in Darknet.
+    Each Block consists of two ConvModules and the input is added to the
+    final output. Each ConvModule is composed of Conv, BN, and act.
+    The first convLayer has filter size of 1x1 and the second one has the
+    filter size of 3x3.
+    Args:
+        in_channels (int): The input channels of this Module.
+        out_channels (int): The output channels of this Module.
+        expansion (int): The kernel size of the convolution. Default: 0.5
+        add_identity (bool): Whether to add identity to the out.
+            Default: True
+        use_depthwise (bool): Whether to use depthwise separable convolution.
+            Default: False
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size=3,
+        expansion=0.5,
+        add_identity=True,
+        use_depthwise=False,
+        act="leaky_relu",
+    ):
+        super(DarknetBottleneck, self).__init__()
+        hidden_channels = int(out_channels * expansion)
+        conv_func = DPModule if use_depthwise else ConvBNLayer
+        self.conv1 = ConvBNLayer(
+            in_channel=in_channels, out_channel=hidden_channels, kernel_size=1, act=act
+        )
+        self.conv2 = conv_func(
+            in_channel=hidden_channels,
+            out_channel=out_channels,
+            kernel_size=kernel_size,
+            stride=1,
+            act=act,
+        )
+        self.add_identity = add_identity and in_channels == out_channels
+
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.conv2(out)
+
+        if self.add_identity:
+            return out + identity
+        else:
+            return out
+
+
+class CSPLayer(nn.Layer):
+    """Cross Stage Partial Layer.
+    Args:
+        in_channels (int): The input channels of the CSP layer.
+        out_channels (int): The output channels of the CSP layer.
+        expand_ratio (float): Ratio to adjust the number of channels of the
+            hidden layer. Default: 0.5
+        num_blocks (int): Number of blocks. Default: 1
+        add_identity (bool): Whether to add identity in blocks.
+            Default: True
+        use_depthwise (bool): Whether to depthwise separable convolution in
+            blocks. Default: False
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size=3,
+        expand_ratio=0.5,
+        num_blocks=1,
+        add_identity=True,
+        use_depthwise=False,
+        act="leaky_relu",
+    ):
+        super().__init__()
+        mid_channels = int(out_channels * expand_ratio)
+        self.main_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
+        self.short_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
+        self.final_conv = ConvBNLayer(2 * mid_channels, out_channels, 1, act=act)
+
+        self.blocks = nn.Sequential(
+            *[
+                DarknetBottleneck(
+                    mid_channels,
+                    mid_channels,
+                    kernel_size,
+                    1.0,
+                    add_identity,
+                    use_depthwise,
+                    act=act,
+                )
+                for _ in range(num_blocks)
+            ]
+        )
+
+    def forward(self, x):
+        x_short = self.short_conv(x)
+
+        x_main = self.main_conv(x)
+        x_main = self.blocks(x_main)
+
+        x_final = paddle.concat((x_main, x_short), axis=1)
+        return self.final_conv(x_final)
+
+
+class Channel_T(nn.Layer):
+    def __init__(self, in_channels=[116, 232, 464], out_channels=96, act="leaky_relu"):
+        super(Channel_T, self).__init__()
+        self.convs = nn.LayerList()
+        for i in range(len(in_channels)):
+            self.convs.append(ConvBNLayer(in_channels[i], out_channels, 1, act=act))
+
+    def forward(self, x):
+        outs = [self.convs[i](x[i]) for i in range(len(x))]
+        return outs
+
+
+class CSPPAN(nn.Layer):
+    """Path Aggregation Network with CSP module.
+    Args:
+        in_channels (List[int]): Number of input channels per scale.
+        out_channels (int): Number of output channels (used at each scale)
+        kernel_size (int): The conv2d kernel size of this Module.
+        num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
+        use_depthwise (bool): Whether to depthwise separable convolution in
+            blocks. Default: True
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size=5,
+        num_csp_blocks=1,
+        use_depthwise=True,
+        act="hard_swish",
+    ):
+        super(CSPPAN, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = [out_channels] * len(in_channels)
+        conv_func = DPModule if use_depthwise else ConvBNLayer
+
+        self.conv_t = Channel_T(in_channels, out_channels, act=act)
+
+        # build top-down blocks
+        self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
+        self.top_down_blocks = nn.LayerList()
+        for idx in range(len(in_channels) - 1, 0, -1):
+            self.top_down_blocks.append(
+                CSPLayer(
+                    out_channels * 2,
+                    out_channels,
+                    kernel_size=kernel_size,
+                    num_blocks=num_csp_blocks,
+                    add_identity=False,
+                    use_depthwise=use_depthwise,
+                    act=act,
+                )
+            )
+
+        # build bottom-up blocks
+        self.downsamples = nn.LayerList()
+        self.bottom_up_blocks = nn.LayerList()
+        for idx in range(len(in_channels) - 1):
+            self.downsamples.append(
+                conv_func(
+                    out_channels,
+                    out_channels,
+                    kernel_size=kernel_size,
+                    stride=2,
+                    act=act,
+                )
+            )
+            self.bottom_up_blocks.append(
+                CSPLayer(
+                    out_channels * 2,
+                    out_channels,
+                    kernel_size=kernel_size,
+                    num_blocks=num_csp_blocks,
+                    add_identity=False,
+                    use_depthwise=use_depthwise,
+                    act=act,
+                )
+            )
+
+    def forward(self, inputs):
+        """
+        Args:
+            inputs (tuple[Tensor]): input features.
+        Returns:
+            tuple[Tensor]: CSPPAN features.
+        """
+        assert len(inputs) == len(self.in_channels)
+        inputs = self.conv_t(inputs)
+
+        # top-down path
+        inner_outs = [inputs[-1]]
+        for idx in range(len(self.in_channels) - 1, 0, -1):
+            feat_heigh = inner_outs[0]
+            feat_low = inputs[idx - 1]
+            upsample_feat = F.upsample(
+                feat_heigh, size=feat_low.shape[2:4], mode="nearest"
+            )
+
+            inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
+                paddle.concat([upsample_feat, feat_low], 1)
+            )
+            inner_outs.insert(0, inner_out)
+
+        # bottom-up path
+        outs = [inner_outs[0]]
+        for idx in range(len(self.in_channels) - 1):
+            feat_low = outs[-1]
+            feat_height = inner_outs[idx + 1]
+            downsample_feat = self.downsamples[idx](feat_low)
+            out = self.bottom_up_blocks[idx](
+                paddle.concat([downsample_feat, feat_height], 1)
+            )
+            outs.append(out)
+
+        return tuple(outs)
diff --git a/docling_ibm_models/slanet_1m/modeling/necks/rnn.py b/docling_ibm_models/slanet_1m/modeling/necks/rnn.py
new file mode 100644
index 0000000..7f50319
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/modeling/necks/rnn.py
@@ -0,0 +1,284 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+
+from modeling.heads.rec_ctc_head import get_para_bias_attr
+from modeling.backbones.rec_svtrnet import (
+    Block,
+    ConvBNLayer,
+    trunc_normal_,
+    zeros_,
+    ones_,
+)
+
+
+class Im2Seq(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super().__init__()
+        self.out_channels = in_channels
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+        assert H == 1
+        x = x.squeeze(axis=2)
+        x = x.transpose([0, 2, 1])  # (NTC)(batch, width, channels)
+        return x
+
+
+class EncoderWithRNN(nn.Layer):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithRNN, self).__init__()
+        self.out_channels = hidden_size * 2
+        self.lstm = nn.LSTM(
+            in_channels, hidden_size, direction="bidirectional", num_layers=2
+        )
+
+    def forward(self, x):
+        x, _ = self.lstm(x)
+        return x
+
+
+class BidirectionalLSTM(nn.Layer):
+    def __init__(
+        self,
+        input_size,
+        hidden_size,
+        output_size=None,
+        num_layers=1,
+        dropout=0,
+        direction=False,
+        time_major=False,
+        with_linear=False,
+    ):
+        super(BidirectionalLSTM, self).__init__()
+        self.with_linear = with_linear
+        self.rnn = nn.LSTM(
+            input_size,
+            hidden_size,
+            num_layers=num_layers,
+            dropout=dropout,
+            direction=direction,
+            time_major=time_major,
+        )
+
+        # text recognition the specified structure LSTM with linear
+        if self.with_linear:
+            self.linear = nn.Linear(hidden_size * 2, output_size)
+
+    def forward(self, input_feature):
+        recurrent, _ = self.rnn(
+            input_feature
+        )  # batch_size x T x input_size -> batch_size x T x (2*hidden_size)
+        if self.with_linear:
+            output = self.linear(recurrent)  # batch_size x T x output_size
+            return output
+        return recurrent
+
+
+class EncoderWithCascadeRNN(nn.Layer):
+    def __init__(
+        self, in_channels, hidden_size, out_channels, num_layers=2, with_linear=False
+    ):
+        super(EncoderWithCascadeRNN, self).__init__()
+        self.out_channels = out_channels[-1]
+        self.encoder = nn.LayerList(
+            [
+                BidirectionalLSTM(
+                    in_channels if i == 0 else out_channels[i - 1],
+                    hidden_size,
+                    output_size=out_channels[i],
+                    num_layers=1,
+                    direction="bidirectional",
+                    with_linear=with_linear,
+                )
+                for i in range(num_layers)
+            ]
+        )
+
+    def forward(self, x):
+        for i, l in enumerate(self.encoder):
+            x = l(x)
+        return x
+
+
+class EncoderWithFC(nn.Layer):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithFC, self).__init__()
+        self.out_channels = hidden_size
+        weight_attr, bias_attr = get_para_bias_attr(l2_decay=0.00001, k=in_channels)
+        self.fc = nn.Linear(
+            in_channels,
+            hidden_size,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr,
+            name="reduce_encoder_fea",
+        )
+
+    def forward(self, x):
+        x = self.fc(x)
+        return x
+
+
+class EncoderWithSVTR(nn.Layer):
+    def __init__(
+        self,
+        in_channels,
+        dims=64,  # XS
+        depth=2,
+        hidden_dims=120,
+        use_guide=False,
+        num_heads=8,
+        qkv_bias=True,
+        mlp_ratio=2.0,
+        drop_rate=0.1,
+        attn_drop_rate=0.1,
+        drop_path=0.0,
+        kernel_size=[3, 3],
+        qk_scale=None,
+    ):
+        super(EncoderWithSVTR, self).__init__()
+        self.depth = depth
+        self.use_guide = use_guide
+        self.conv1 = ConvBNLayer(
+            in_channels,
+            in_channels // 8,
+            kernel_size=kernel_size,
+            padding=[kernel_size[0] // 2, kernel_size[1] // 2],
+            act=nn.Swish,
+        )
+        self.conv2 = ConvBNLayer(
+            in_channels // 8, hidden_dims, kernel_size=1, act=nn.Swish
+        )
+
+        self.svtr_block = nn.LayerList(
+            [
+                Block(
+                    dim=hidden_dims,
+                    num_heads=num_heads,
+                    mixer="Global",
+                    HW=None,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop_rate,
+                    act_layer=nn.Swish,
+                    attn_drop=attn_drop_rate,
+                    drop_path=drop_path,
+                    norm_layer="nn.LayerNorm",
+                    epsilon=1e-05,
+                    prenorm=False,
+                )
+                for i in range(depth)
+            ]
+        )
+        self.norm = nn.LayerNorm(hidden_dims, epsilon=1e-6)
+        self.conv3 = ConvBNLayer(hidden_dims, in_channels, kernel_size=1, act=nn.Swish)
+        # last conv-nxn, the input is concat of input tensor and conv3 output tensor
+        self.conv4 = ConvBNLayer(
+            2 * in_channels,
+            in_channels // 8,
+            kernel_size=kernel_size,
+            padding=[kernel_size[0] // 2, kernel_size[1] // 2],
+            act=nn.Swish,
+        )
+
+        self.conv1x1 = ConvBNLayer(in_channels // 8, dims, kernel_size=1, act=nn.Swish)
+        self.out_channels = dims
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                zeros_(m.bias)
+        elif isinstance(m, nn.LayerNorm):
+            zeros_(m.bias)
+            ones_(m.weight)
+
+    def forward(self, x):
+        # for use guide
+        if self.use_guide:
+            z = x.clone()
+            z.stop_gradient = True
+        else:
+            z = x
+        # for short cut
+        h = z
+        # reduce dim
+        z = self.conv1(z)
+        z = self.conv2(z)
+        # SVTR global block
+        B, C, H, W = z.shape
+        z = z.flatten(2).transpose([0, 2, 1])
+        for blk in self.svtr_block:
+            z = blk(z)
+        z = self.norm(z)
+        # last stage
+        z = z.reshape([0, H, W, C]).transpose([0, 3, 1, 2])
+        z = self.conv3(z)
+        z = paddle.concat((h, z), axis=1)
+        z = self.conv1x1(self.conv4(z))
+        return z
+
+
+class SequenceEncoder(nn.Layer):
+    def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs):
+        super(SequenceEncoder, self).__init__()
+        self.encoder_reshape = Im2Seq(in_channels)
+        self.out_channels = self.encoder_reshape.out_channels
+        self.encoder_type = encoder_type
+        if encoder_type == "reshape":
+            self.only_reshape = True
+        else:
+            support_encoder_dict = {
+                "reshape": Im2Seq,
+                "fc": EncoderWithFC,
+                "rnn": EncoderWithRNN,
+                "svtr": EncoderWithSVTR,
+                "cascadernn": EncoderWithCascadeRNN,
+            }
+            assert encoder_type in support_encoder_dict, "{} must in {}".format(
+                encoder_type, support_encoder_dict.keys()
+            )
+            if encoder_type == "svtr":
+                self.encoder = support_encoder_dict[encoder_type](
+                    self.encoder_reshape.out_channels, **kwargs
+                )
+            elif encoder_type == "cascadernn":
+                self.encoder = support_encoder_dict[encoder_type](
+                    self.encoder_reshape.out_channels, hidden_size, **kwargs
+                )
+            else:
+                self.encoder = support_encoder_dict[encoder_type](
+                    self.encoder_reshape.out_channels, hidden_size
+                )
+            self.out_channels = self.encoder.out_channels
+            self.only_reshape = False
+
+    def forward(self, x):
+        if self.encoder_type != "svtr":
+            x = self.encoder_reshape(x)
+            if not self.only_reshape:
+                x = self.encoder(x)
+            return x
+        else:
+            x = self.encoder(x)
+            x = self.encoder_reshape(x)
+            return x
diff --git a/docling_ibm_models/slanet_1m/optimizer/__init__.py b/docling_ibm_models/slanet_1m/optimizer/__init__.py
new file mode 100644
index 0000000..a191a4b
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/optimizer/__init__.py
@@ -0,0 +1,66 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import copy
+import paddle
+
+__all__ = ["build_optimizer"]
+
+
+def build_lr_scheduler(lr_config, epochs, step_each_epoch):
+    from . import learning_rate
+
+    lr_config.update({"epochs": epochs, "step_each_epoch": step_each_epoch})
+    lr_name = lr_config.pop("name", "Const")
+    lr = getattr(learning_rate, lr_name)(**lr_config)()
+    return lr
+
+
+def build_optimizer(config, epochs, step_each_epoch, model):
+    from . import regularizer, optimizer
+
+    config = copy.deepcopy(config)
+    # step1 build lr
+    lr = build_lr_scheduler(config.pop("lr"), epochs, step_each_epoch)
+
+    # step2 build regularization
+    if "regularizer" in config and config["regularizer"] is not None:
+        reg_config = config.pop("regularizer")
+        reg_name = reg_config.pop("name")
+        if not hasattr(regularizer, reg_name):
+            reg_name += "Decay"
+        reg = getattr(regularizer, reg_name)(**reg_config)()
+    elif "weight_decay" in config:
+        reg = config.pop("weight_decay")
+    else:
+        reg = None
+
+    # step3 build optimizer
+    optim_name = config.pop("name")
+    if "clip_norm" in config:
+        clip_norm = config.pop("clip_norm")
+        grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
+    elif "clip_norm_global" in config:
+        clip_norm = config.pop("clip_norm_global")
+        grad_clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=clip_norm)
+    else:
+        grad_clip = None
+    optim = getattr(optimizer, optim_name)(
+        learning_rate=lr, weight_decay=reg, grad_clip=grad_clip, **config
+    )
+    return optim(model), lr
diff --git a/docling_ibm_models/slanet_1m/optimizer/learning_rate.py b/docling_ibm_models/slanet_1m/optimizer/learning_rate.py
new file mode 100644
index 0000000..687a145
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/optimizer/learning_rate.py
@@ -0,0 +1,454 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from paddle.optimizer import lr
+from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay, TwoStepCosineDecay
+
+
+class Linear(object):
+    """
+    Linear learning rate decay
+    Args:
+        lr (float): The initial learning rate. It is a python float number.
+        epochs(int): The decay step size. It determines the decay cycle.
+        end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
+        power(float, optional): Power of polynomial. Default: 1.0.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(
+        self,
+        learning_rate,
+        epochs,
+        step_each_epoch,
+        end_lr=0.0,
+        power=1.0,
+        warmup_epoch=0,
+        last_epoch=-1,
+        **kwargs,
+    ):
+        super(Linear, self).__init__()
+        self.learning_rate = learning_rate
+        self.epochs = epochs * step_each_epoch
+        self.end_lr = end_lr
+        self.power = power
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = lr.PolynomialDecay(
+            learning_rate=self.learning_rate,
+            decay_steps=self.epochs,
+            end_lr=self.end_lr,
+            power=self.power,
+            last_epoch=self.last_epoch,
+        )
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch,
+            )
+        return learning_rate
+
+
+class Cosine(object):
+    """
+    Cosine learning rate decay
+    lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
+    Args:
+        lr(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(
+        self,
+        learning_rate,
+        step_each_epoch,
+        epochs,
+        warmup_epoch=0,
+        last_epoch=-1,
+        **kwargs,
+    ):
+        super(Cosine, self).__init__()
+        self.learning_rate = learning_rate
+        self.T_max = step_each_epoch * epochs
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = lr.CosineAnnealingDecay(
+            learning_rate=self.learning_rate,
+            T_max=self.T_max,
+            last_epoch=self.last_epoch,
+        )
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch,
+            )
+        return learning_rate
+
+
+class Step(object):
+    """
+    Piecewise learning rate decay
+    Args:
+        step_each_epoch(int): steps each epoch
+        learning_rate (float): The initial learning rate. It is a python float number.
+        step_size (int): the interval to update.
+        gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
+            It should be less than 1.0. Default: 0.1.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(
+        self,
+        learning_rate,
+        step_size,
+        step_each_epoch,
+        gamma,
+        warmup_epoch=0,
+        last_epoch=-1,
+        **kwargs,
+    ):
+        super(Step, self).__init__()
+        self.step_size = step_each_epoch * step_size
+        self.learning_rate = learning_rate
+        self.gamma = gamma
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = lr.StepDecay(
+            learning_rate=self.learning_rate,
+            step_size=self.step_size,
+            gamma=self.gamma,
+            last_epoch=self.last_epoch,
+        )
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch,
+            )
+        return learning_rate
+
+
+class Piecewise(object):
+    """
+    Piecewise learning rate decay
+    Args:
+        boundaries(list): A list of steps numbers. The type of element in the list is python int.
+        values(list): A list of learning rate values that will be picked during different epoch boundaries.
+            The type of element in the list is python float.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(
+        self,
+        step_each_epoch,
+        decay_epochs,
+        values,
+        warmup_epoch=0,
+        last_epoch=-1,
+        **kwargs,
+    ):
+        super(Piecewise, self).__init__()
+        self.boundaries = [step_each_epoch * e for e in decay_epochs]
+        self.values = values
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = lr.PiecewiseDecay(
+            boundaries=self.boundaries, values=self.values, last_epoch=self.last_epoch
+        )
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.values[0],
+                last_epoch=self.last_epoch,
+            )
+        return learning_rate
+
+
+class CyclicalCosine(object):
+    """
+    Cyclical cosine learning rate decay
+    Args:
+        learning_rate(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        cycle(int): period of the cosine learning rate
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(
+        self,
+        learning_rate,
+        step_each_epoch,
+        epochs,
+        cycle,
+        warmup_epoch=0,
+        last_epoch=-1,
+        **kwargs,
+    ):
+        super(CyclicalCosine, self).__init__()
+        self.learning_rate = learning_rate
+        self.T_max = step_each_epoch * epochs
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+        self.cycle = round(cycle * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = CyclicalCosineDecay(
+            learning_rate=self.learning_rate,
+            T_max=self.T_max,
+            cycle=self.cycle,
+            last_epoch=self.last_epoch,
+        )
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch,
+            )
+        return learning_rate
+
+
+class OneCycle(object):
+    """
+    One Cycle learning rate decay
+    Args:
+        max_lr(float): Upper learning rate boundaries
+        epochs(int): total training epochs
+        step_each_epoch(int): steps each epoch
+        anneal_strategy(str): {‘cos’, ‘linear’} Specifies the annealing strategy: “cos” for cosine annealing, “linear” for linear annealing.
+            Default: ‘cos’
+        three_phase(bool): If True, use a third phase of the schedule to annihilate the learning rate according to ‘final_div_factor’
+            instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by ‘pct_start’).
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(
+        self,
+        max_lr,
+        epochs,
+        step_each_epoch,
+        anneal_strategy="cos",
+        three_phase=False,
+        warmup_epoch=0,
+        last_epoch=-1,
+        **kwargs,
+    ):
+        super(OneCycle, self).__init__()
+        self.max_lr = max_lr
+        self.epochs = epochs
+        self.steps_per_epoch = step_each_epoch
+        self.anneal_strategy = anneal_strategy
+        self.three_phase = three_phase
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = OneCycleDecay(
+            max_lr=self.max_lr,
+            epochs=self.epochs,
+            steps_per_epoch=self.steps_per_epoch,
+            anneal_strategy=self.anneal_strategy,
+            three_phase=self.three_phase,
+            last_epoch=self.last_epoch,
+        )
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.max_lr,
+                last_epoch=self.last_epoch,
+            )
+        return learning_rate
+
+
+class Const(object):
+    """
+    Const learning rate decay
+    Args:
+        learning_rate(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(
+        self, learning_rate, step_each_epoch, warmup_epoch=0, last_epoch=-1, **kwargs
+    ):
+        super(Const, self).__init__()
+        self.learning_rate = learning_rate
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = self.learning_rate
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch,
+            )
+        return learning_rate
+
+
+class DecayLearningRate(object):
+    """
+    DecayLearningRate learning rate decay
+    new_lr = (lr - end_lr) * (1 - epoch/decay_steps)**power + end_lr
+    Args:
+        learning_rate(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        factor(float): Power of polynomial, should greater than 0.0 to get learning rate decay. Default: 0.9
+        end_lr(float): The minimum final learning rate. Default: 0.0.
+    """
+
+    def __init__(
+        self, learning_rate, step_each_epoch, epochs, factor=0.9, end_lr=0, **kwargs
+    ):
+        super(DecayLearningRate, self).__init__()
+        self.learning_rate = learning_rate
+        self.epochs = epochs + 1
+        self.factor = factor
+        self.end_lr = 0
+        self.decay_steps = step_each_epoch * epochs
+
+    def __call__(self):
+        learning_rate = lr.PolynomialDecay(
+            learning_rate=self.learning_rate,
+            decay_steps=self.decay_steps,
+            power=self.factor,
+            end_lr=self.end_lr,
+        )
+        return learning_rate
+
+
+class MultiStepDecay(object):
+    """
+    Piecewise learning rate decay
+    Args:
+        step_each_epoch(int): steps each epoch
+        learning_rate (float): The initial learning rate. It is a python float number.
+        step_size (int): the interval to update.
+        gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
+            It should be less than 1.0. Default: 0.1.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(
+        self,
+        learning_rate,
+        milestones,
+        step_each_epoch,
+        gamma,
+        warmup_epoch=0,
+        last_epoch=-1,
+        **kwargs,
+    ):
+        super(MultiStepDecay, self).__init__()
+        self.milestones = [step_each_epoch * e for e in milestones]
+        self.learning_rate = learning_rate
+        self.gamma = gamma
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = lr.MultiStepDecay(
+            learning_rate=self.learning_rate,
+            milestones=self.milestones,
+            gamma=self.gamma,
+            last_epoch=self.last_epoch,
+        )
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch,
+            )
+        return learning_rate
+
+
+class TwoStepCosine(object):
+    """
+    Cosine learning rate decay
+    lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
+    Args:
+        lr(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(
+        self,
+        learning_rate,
+        step_each_epoch,
+        epochs,
+        warmup_epoch=0,
+        last_epoch=-1,
+        **kwargs,
+    ):
+        super(TwoStepCosine, self).__init__()
+        self.learning_rate = learning_rate
+        self.T_max1 = step_each_epoch * 200
+        self.T_max2 = step_each_epoch * epochs
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = TwoStepCosineDecay(
+            learning_rate=self.learning_rate,
+            T_max1=self.T_max1,
+            T_max2=self.T_max2,
+            last_epoch=self.last_epoch,
+        )
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch,
+            )
+        return learning_rate
diff --git a/docling_ibm_models/slanet_1m/optimizer/lr_scheduler.py b/docling_ibm_models/slanet_1m/optimizer/lr_scheduler.py
new file mode 100644
index 0000000..4034e14
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/optimizer/lr_scheduler.py
@@ -0,0 +1,240 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+from paddle.optimizer.lr import LRScheduler
+
+
+class CyclicalCosineDecay(LRScheduler):
+    def __init__(
+        self, learning_rate, T_max, cycle=1, last_epoch=-1, eta_min=0.0, verbose=False
+    ):
+        """
+        Cyclical cosine learning rate decay
+        A learning rate which can be referred in https://arxiv.org/pdf/2012.12645.pdf
+        Args:
+            learning rate(float): learning rate
+            T_max(int): maximum epoch num
+            cycle(int): period of the cosine decay
+            last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+            eta_min(float): minimum learning rate during training
+            verbose(bool): whether to print learning rate for each epoch
+        """
+        super(CyclicalCosineDecay, self).__init__(learning_rate, last_epoch, verbose)
+        self.cycle = cycle
+        self.eta_min = eta_min
+
+    def get_lr(self):
+        if self.last_epoch == 0:
+            return self.base_lr
+        reletive_epoch = self.last_epoch % self.cycle
+        lr = self.eta_min + 0.5 * (self.base_lr - self.eta_min) * (
+            1 + math.cos(math.pi * reletive_epoch / self.cycle)
+        )
+        return lr
+
+
+class OneCycleDecay(LRScheduler):
+    """
+    One Cycle learning rate decay
+    A learning rate which can be referred in https://arxiv.org/abs/1708.07120
+    Code refered in https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
+    """
+
+    def __init__(
+        self,
+        max_lr,
+        epochs=None,
+        steps_per_epoch=None,
+        pct_start=0.3,
+        anneal_strategy="cos",
+        div_factor=25.0,
+        final_div_factor=1e4,
+        three_phase=False,
+        last_epoch=-1,
+        verbose=False,
+    ):
+        # Validate total_steps
+        if epochs <= 0 or not isinstance(epochs, int):
+            raise ValueError(
+                "Expected positive integer epochs, but got {}".format(epochs)
+            )
+        if steps_per_epoch <= 0 or not isinstance(steps_per_epoch, int):
+            raise ValueError(
+                "Expected positive integer steps_per_epoch, but got {}".format(
+                    steps_per_epoch
+                )
+            )
+        self.total_steps = epochs * steps_per_epoch
+
+        self.max_lr = max_lr
+        self.initial_lr = self.max_lr / div_factor
+        self.min_lr = self.initial_lr / final_div_factor
+
+        if three_phase:
+            self._schedule_phases = [
+                {
+                    "end_step": float(pct_start * self.total_steps) - 1,
+                    "start_lr": self.initial_lr,
+                    "end_lr": self.max_lr,
+                },
+                {
+                    "end_step": float(2 * pct_start * self.total_steps) - 2,
+                    "start_lr": self.max_lr,
+                    "end_lr": self.initial_lr,
+                },
+                {
+                    "end_step": self.total_steps - 1,
+                    "start_lr": self.initial_lr,
+                    "end_lr": self.min_lr,
+                },
+            ]
+        else:
+            self._schedule_phases = [
+                {
+                    "end_step": float(pct_start * self.total_steps) - 1,
+                    "start_lr": self.initial_lr,
+                    "end_lr": self.max_lr,
+                },
+                {
+                    "end_step": self.total_steps - 1,
+                    "start_lr": self.max_lr,
+                    "end_lr": self.min_lr,
+                },
+            ]
+
+        # Validate pct_start
+        if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float):
+            raise ValueError(
+                "Expected float between 0 and 1 pct_start, but got {}".format(pct_start)
+            )
+
+        # Validate anneal_strategy
+        if anneal_strategy not in ["cos", "linear"]:
+            raise ValueError(
+                "anneal_strategy must by one of 'cos' or 'linear', instead got {}".format(
+                    anneal_strategy
+                )
+            )
+        elif anneal_strategy == "cos":
+            self.anneal_func = self._annealing_cos
+        elif anneal_strategy == "linear":
+            self.anneal_func = self._annealing_linear
+
+        super(OneCycleDecay, self).__init__(max_lr, last_epoch, verbose)
+
+    def _annealing_cos(self, start, end, pct):
+        "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0."
+        cos_out = math.cos(math.pi * pct) + 1
+        return end + (start - end) / 2.0 * cos_out
+
+    def _annealing_linear(self, start, end, pct):
+        "Linearly anneal from `start` to `end` as pct goes from 0.0 to 1.0."
+        return (end - start) * pct + start
+
+    def get_lr(self):
+        computed_lr = 0.0
+        step_num = self.last_epoch
+
+        if step_num > self.total_steps:
+            raise ValueError(
+                "Tried to step {} times. The specified number of total steps is {}".format(
+                    step_num + 1, self.total_steps
+                )
+            )
+        start_step = 0
+        for i, phase in enumerate(self._schedule_phases):
+            end_step = phase["end_step"]
+            if step_num <= end_step or i == len(self._schedule_phases) - 1:
+                pct = (step_num - start_step) / (end_step - start_step)
+                computed_lr = self.anneal_func(phase["start_lr"], phase["end_lr"], pct)
+                break
+            start_step = phase["end_step"]
+
+        return computed_lr
+
+
+class TwoStepCosineDecay(LRScheduler):
+    def __init__(
+        self, learning_rate, T_max1, T_max2, eta_min=0, last_epoch=-1, verbose=False
+    ):
+        if not isinstance(T_max1, int):
+            raise TypeError(
+                "The type of 'T_max1' in 'CosineAnnealingDecay' must be 'int', but received %s."
+                % type(T_max1)
+            )
+        if not isinstance(T_max2, int):
+            raise TypeError(
+                "The type of 'T_max2' in 'CosineAnnealingDecay' must be 'int', but received %s."
+                % type(T_max2)
+            )
+        if not isinstance(eta_min, (float, int)):
+            raise TypeError(
+                "The type of 'eta_min' in 'CosineAnnealingDecay' must be 'float, int', but received %s."
+                % type(eta_min)
+            )
+        assert T_max1 > 0 and isinstance(
+            T_max1, int
+        ), " 'T_max1' must be a positive integer."
+        assert T_max2 > 0 and isinstance(
+            T_max2, int
+        ), " 'T_max1' must be a positive integer."
+        self.T_max1 = T_max1
+        self.T_max2 = T_max2
+        self.eta_min = float(eta_min)
+        super(TwoStepCosineDecay, self).__init__(learning_rate, last_epoch, verbose)
+
+    def get_lr(self):
+        if self.last_epoch <= self.T_max1:
+            if self.last_epoch == 0:
+                return self.base_lr
+            elif (self.last_epoch - 1 - self.T_max1) % (2 * self.T_max1) == 0:
+                return (
+                    self.last_lr
+                    + (self.base_lr - self.eta_min)
+                    * (1 - math.cos(math.pi / self.T_max1))
+                    / 2
+                )
+
+            return (1 + math.cos(math.pi * self.last_epoch / self.T_max1)) / (
+                1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max1)
+            ) * (self.last_lr - self.eta_min) + self.eta_min
+        else:
+            if (self.last_epoch - 1 - self.T_max2) % (2 * self.T_max2) == 0:
+                return (
+                    self.last_lr
+                    + (self.base_lr - self.eta_min)
+                    * (1 - math.cos(math.pi / self.T_max2))
+                    / 2
+                )
+
+            return (1 + math.cos(math.pi * self.last_epoch / self.T_max2)) / (
+                1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max2)
+            ) * (self.last_lr - self.eta_min) + self.eta_min
+
+    def _get_closed_form_lr(self):
+        if self.last_epoch <= self.T_max1:
+            return (
+                self.eta_min
+                + (self.base_lr - self.eta_min)
+                * (1 + math.cos(math.pi * self.last_epoch / self.T_max1))
+                / 2
+            )
+        else:
+            return (
+                self.eta_min
+                + (self.base_lr - self.eta_min)
+                * (1 + math.cos(math.pi * self.last_epoch / self.T_max2))
+                / 2
+            )
diff --git a/docling_ibm_models/slanet_1m/optimizer/optimizer.py b/docling_ibm_models/slanet_1m/optimizer/optimizer.py
new file mode 100644
index 0000000..d7f78a5
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/optimizer/optimizer.py
@@ -0,0 +1,292 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from paddle import optimizer as optim
+
+
+class Momentum(object):
+    """
+    Simple Momentum optimizer with velocity state.
+    Args:
+        learning_rate (float|Variable) - The learning rate used to update parameters.
+            Can be a float value or a Variable with one float value as data element.
+        momentum (float) - Momentum factor.
+        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
+    """
+
+    def __init__(
+        self, learning_rate, momentum, weight_decay=None, grad_clip=None, **args
+    ):
+        super(Momentum, self).__init__()
+        self.learning_rate = learning_rate
+        self.momentum = momentum
+        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip
+
+    def __call__(self, model):
+        train_params = [
+            param for param in model.parameters() if param.trainable is True
+        ]
+        opt = optim.Momentum(
+            learning_rate=self.learning_rate,
+            momentum=self.momentum,
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            parameters=train_params,
+        )
+        return opt
+
+
+class Adam(object):
+    def __init__(
+        self,
+        learning_rate=0.001,
+        beta1=0.9,
+        beta2=0.999,
+        epsilon=1e-08,
+        parameter_list=None,
+        weight_decay=None,
+        grad_clip=None,
+        name=None,
+        lazy_mode=False,
+        **kwargs,
+    ):
+        self.learning_rate = learning_rate
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.parameter_list = parameter_list
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip
+        self.name = name
+        self.lazy_mode = lazy_mode
+        self.group_lr = kwargs.get("group_lr", False)
+        self.training_step = kwargs.get("training_step", None)
+
+    def __call__(self, model):
+        if self.group_lr:
+            if self.training_step == "LF_2":
+                import paddle
+
+                if isinstance(model, paddle.DataParallel):  # multi gpu
+                    mlm = model._layers.head.MLM_VRM.MLM.parameters()
+                    pre_mlm_pp = (
+                        model._layers.head.MLM_VRM.Prediction.pp_share.parameters()
+                    )
+                    pre_mlm_w = (
+                        model._layers.head.MLM_VRM.Prediction.w_share.parameters()
+                    )
+                else:  # single gpu
+                    mlm = model.head.MLM_VRM.MLM.parameters()
+                    pre_mlm_pp = model.head.MLM_VRM.Prediction.pp_share.parameters()
+                    pre_mlm_w = model.head.MLM_VRM.Prediction.w_share.parameters()
+
+                total = []
+                for param in mlm:
+                    total.append(id(param))
+                for param in pre_mlm_pp:
+                    total.append(id(param))
+                for param in pre_mlm_w:
+                    total.append(id(param))
+
+                group_base_params = [
+                    param for param in model.parameters() if id(param) in total
+                ]
+                group_small_params = [
+                    param for param in model.parameters() if id(param) not in total
+                ]
+                train_params = [
+                    {"params": group_base_params},
+                    {
+                        "params": group_small_params,
+                        "learning_rate": self.learning_rate.values[0] * 0.1,
+                    },
+                ]
+
+            else:
+                print("group lr currently only support VisionLAN in LF_2 training step")
+                train_params = [
+                    param for param in model.parameters() if param.trainable is True
+                ]
+        else:
+            train_params = [
+                param for param in model.parameters() if param.trainable is True
+            ]
+
+        opt = optim.Adam(
+            learning_rate=self.learning_rate,
+            beta1=self.beta1,
+            beta2=self.beta2,
+            epsilon=self.epsilon,
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            name=self.name,
+            lazy_mode=self.lazy_mode,
+            parameters=train_params,
+        )
+        return opt
+
+
+class RMSProp(object):
+    """
+    Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
+    Args:
+        learning_rate (float|Variable) - The learning rate used to update parameters.
+            Can be a float value or a Variable with one float value as data element.
+        momentum (float) - Momentum factor.
+        rho (float) - rho value in equation.
+        epsilon (float) - avoid division by zero, default is 1e-6.
+        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
+    """
+
+    def __init__(
+        self,
+        learning_rate,
+        momentum=0.0,
+        rho=0.95,
+        epsilon=1e-6,
+        weight_decay=None,
+        grad_clip=None,
+        **args,
+    ):
+        super(RMSProp, self).__init__()
+        self.learning_rate = learning_rate
+        self.momentum = momentum
+        self.rho = rho
+        self.epsilon = epsilon
+        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip
+
+    def __call__(self, model):
+        train_params = [
+            param for param in model.parameters() if param.trainable is True
+        ]
+        opt = optim.RMSProp(
+            learning_rate=self.learning_rate,
+            momentum=self.momentum,
+            rho=self.rho,
+            epsilon=self.epsilon,
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            parameters=train_params,
+        )
+        return opt
+
+
+class Adadelta(object):
+    def __init__(
+        self,
+        learning_rate=0.001,
+        epsilon=1e-08,
+        rho=0.95,
+        parameter_list=None,
+        weight_decay=None,
+        grad_clip=None,
+        name=None,
+        **kwargs,
+    ):
+        self.learning_rate = learning_rate
+        self.epsilon = epsilon
+        self.rho = rho
+        self.parameter_list = parameter_list
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip
+        self.name = name
+
+    def __call__(self, model):
+        train_params = [
+            param for param in model.parameters() if param.trainable is True
+        ]
+        opt = optim.Adadelta(
+            learning_rate=self.learning_rate,
+            epsilon=self.epsilon,
+            rho=self.rho,
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            name=self.name,
+            parameters=train_params,
+        )
+        return opt
+
+
+class AdamW(object):
+    def __init__(
+        self,
+        learning_rate=0.001,
+        beta1=0.9,
+        beta2=0.999,
+        epsilon=1e-8,
+        weight_decay=0.01,
+        multi_precision=False,
+        grad_clip=None,
+        no_weight_decay_name=None,
+        one_dim_param_no_weight_decay=False,
+        name=None,
+        lazy_mode=False,
+        **args,
+    ):
+        super().__init__()
+        self.learning_rate = learning_rate
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.grad_clip = grad_clip
+        self.weight_decay = 0.01 if weight_decay is None else weight_decay
+        self.grad_clip = grad_clip
+        self.name = name
+        self.lazy_mode = lazy_mode
+        self.multi_precision = multi_precision
+        self.no_weight_decay_name_list = (
+            no_weight_decay_name.split() if no_weight_decay_name else []
+        )
+        self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay
+
+    def __call__(self, model):
+        parameters = [param for param in model.parameters() if param.trainable is True]
+
+        self.no_weight_decay_param_name_list = [
+            p.name
+            for n, p in model.named_parameters()
+            if any(nd in n for nd in self.no_weight_decay_name_list)
+        ]
+
+        if self.one_dim_param_no_weight_decay:
+            self.no_weight_decay_param_name_list += [
+                p.name for n, p in model.named_parameters() if len(p.shape) == 1
+            ]
+
+        opt = optim.AdamW(
+            learning_rate=self.learning_rate,
+            beta1=self.beta1,
+            beta2=self.beta2,
+            epsilon=self.epsilon,
+            parameters=parameters,
+            weight_decay=self.weight_decay,
+            multi_precision=self.multi_precision,
+            grad_clip=self.grad_clip,
+            name=self.name,
+            lazy_mode=self.lazy_mode,
+            apply_decay_param_fun=self._apply_decay_param_fun,
+        )
+        return opt
+
+    def _apply_decay_param_fun(self, name):
+        return name not in self.no_weight_decay_param_name_list
diff --git a/docling_ibm_models/slanet_1m/optimizer/regularizer.py b/docling_ibm_models/slanet_1m/optimizer/regularizer.py
new file mode 100644
index 0000000..740ad1c
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/optimizer/regularizer.py
@@ -0,0 +1,51 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import paddle
+
+
+class L1Decay(object):
+    """
+    L1 Weight Decay Regularization, which encourages the weights to be sparse.
+    Args:
+        factor(float): regularization coeff. Default:0.0.
+    """
+
+    def __init__(self, factor=0.0):
+        super(L1Decay, self).__init__()
+        self.coeff = factor
+
+    def __call__(self):
+        reg = paddle.regularizer.L1Decay(self.coeff)
+        return reg
+
+
+class L2Decay(object):
+    """
+    L2 Weight Decay Regularization, which helps to prevent the model over-fitting.
+    Args:
+        factor(float): regularization coeff. Default:0.0.
+    """
+
+    def __init__(self, factor=0.0):
+        super(L2Decay, self).__init__()
+        self.coeff = float(factor)
+
+    def __call__(self):
+        return self.coeff
diff --git a/docling_ibm_models/slanet_1m/predict_table.py b/docling_ibm_models/slanet_1m/predict_table.py
new file mode 100644
index 0000000..814cad4
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/predict_table.py
@@ -0,0 +1,254 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "..")))
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "../..")))
+
+os.environ["FLAGS_allocator_strategy"] = "auto_growth"
+import cv2
+import copy
+import logging
+import numpy as np
+import time
+import paddleocr.tools.infer.predict_rec as predict_rec
+import paddleocr.tools.infer.predict_det as predict_det
+import paddleocr.tools.infer.utility as utility
+from paddleocr.tools.infer.predict_system import sorted_boxes
+from paddleocr.ppocr.utils.utility import get_image_file_list, check_and_read
+from paddleocr.ppocr.utils.logging import get_logger
+from paddleocr.ppstructure.table.matcher import TableMatch
+from paddleocr.ppstructure.table.table_master_match import TableMasterMatcher
+from paddleocr.ppstructure.utility import parse_args
+import paddleocr.ppstructure.table.predict_structure as predict_strture
+
+logger = get_logger()
+
+
+def expand(pix, det_box, shape):
+    x0, y0, x1, y1 = det_box
+    #     print(shape)
+    h, w, c = shape
+    tmp_x0 = x0 - pix
+    tmp_x1 = x1 + pix
+    tmp_y0 = y0 - pix
+    tmp_y1 = y1 + pix
+    x0_ = tmp_x0 if tmp_x0 >= 0 else 0
+    x1_ = tmp_x1 if tmp_x1 <= w else w
+    y0_ = tmp_y0 if tmp_y0 >= 0 else 0
+    y1_ = tmp_y1 if tmp_y1 <= h else h
+    return x0_, y0_, x1_, y1_
+
+
+class TableSystem(object):
+    def __init__(self, args, text_detector=None, text_recognizer=None):
+        self.args = args
+        if not args.show_log:
+            logger.setLevel(logging.INFO)
+        benchmark_tmp = False
+        if args.benchmark:
+            benchmark_tmp = args.benchmark
+            args.benchmark = False
+        self.text_detector = (
+            predict_det.TextDetector(copy.deepcopy(args))
+            if text_detector is None
+            else text_detector
+        )
+        self.text_recognizer = (
+            predict_rec.TextRecognizer(copy.deepcopy(args))
+            if text_recognizer is None
+            else text_recognizer
+        )
+        if benchmark_tmp:
+            args.benchmark = True
+        self.table_structurer = predict_strture.TableStructurer(args)
+        if args.table_algorithm in ["TableMaster"]:
+            self.match = TableMasterMatcher()
+        else:
+            self.match = TableMatch(filter_ocr_result=True)
+
+        (
+            self.predictor,
+            self.input_tensor,
+            self.output_tensors,
+            self.config,
+        ) = utility.create_predictor(args, "table", logger)
+
+    def __call__(self, img, return_ocr_result_in_table=False):
+        result = dict()
+        time_dict = {"det": 0, "rec": 0, "table": 0, "all": 0, "match": 0}
+        start = time.time()
+        structure_res, elapse = self._structure(copy.deepcopy(img))
+        result["cell_bbox"] = structure_res[1].tolist()
+        time_dict["table"] = elapse
+
+        dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr(copy.deepcopy(img))
+        time_dict["det"] = det_elapse
+        time_dict["rec"] = rec_elapse
+
+        if return_ocr_result_in_table:
+            result["boxes"] = [x.tolist() for x in dt_boxes]
+            result["rec_res"] = rec_res
+
+        tic = time.time()
+        pred_html = self.match(structure_res, dt_boxes, rec_res)
+        toc = time.time()
+        time_dict["match"] = toc - tic
+        result["html"] = pred_html
+        end = time.time()
+        time_dict["all"] = end - start
+        return result, time_dict
+
+    def _structure(self, img):
+        structure_res, elapse = self.table_structurer(copy.deepcopy(img))
+        return structure_res, elapse
+
+    def _ocr(self, img):
+        h, w = img.shape[:2]
+        dt_boxes, det_elapse = self.text_detector(copy.deepcopy(img))
+        dt_boxes = sorted_boxes(dt_boxes)
+
+        r_boxes = []
+        for box in dt_boxes:
+            x_min = max(0, box[:, 0].min() - 1)
+            x_max = min(w, box[:, 0].max() + 1)
+            y_min = max(0, box[:, 1].min() - 1)
+            y_max = min(h, box[:, 1].max() + 1)
+            box = [x_min, y_min, x_max, y_max]
+            r_boxes.append(box)
+        dt_boxes = np.array(r_boxes)
+        logger.debug("dt_boxes num : {}, elapse : {}".format(len(dt_boxes), det_elapse))
+        if dt_boxes is None:
+            return None, None
+
+        img_crop_list = []
+        for i in range(len(dt_boxes)):
+            det_box = dt_boxes[i]
+            x0, y0, x1, y1 = expand(2, det_box, img.shape)
+            text_rect = img[int(y0) : int(y1), int(x0) : int(x1), :]
+            img_crop_list.append(text_rect)
+        rec_res, rec_elapse = self.text_recognizer(img_crop_list)
+        logger.debug("rec_res num  : {}, elapse : {}".format(len(rec_res), rec_elapse))
+        return dt_boxes, rec_res, det_elapse, rec_elapse
+
+
+def to_excel(html_table, excel_path):
+    from tablepyxl import tablepyxl
+
+    tablepyxl.document_to_xl(html_table, excel_path)
+
+
+def main(args):
+    image_file_list = get_image_file_list(args.image_dir)
+    image_file_list = image_file_list[args.process_id :: args.total_process_num]
+    os.makedirs(args.output, exist_ok=True)
+
+    table_sys = TableSystem(args)
+    img_num = len(image_file_list)
+
+    f_html = open(os.path.join(args.output, "show.html"), mode="w", encoding="utf-8")
+    f_html.write("<html>\n<body>\n")
+    f_html.write('<table border="1">\n')
+    f_html.write(
+        '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />'
+    )
+    f_html.write("<tr>\n")
+    f_html.write("<td>img name\n")
+    f_html.write("<td>ori image</td>")
+    f_html.write("<td>table html</td>")
+    f_html.write("<td>cell box</td>")
+    f_html.write("</tr>\n")
+
+    for i, image_file in enumerate(image_file_list):
+        logger.info("[{}/{}] {}".format(i, img_num, image_file))
+        img, flag, _ = check_and_read(image_file)
+        excel_path = os.path.join(
+            args.output, os.path.basename(image_file).split(".")[0] + ".xlsx"
+        )
+        if not flag:
+            img = cv2.imread(image_file)
+        if img is None:
+            logger.error("error in loading image:{}".format(image_file))
+            continue
+        starttime = time.time()
+        pred_res, _ = table_sys(img)
+        pred_html = pred_res["html"]
+        logger.info(pred_html)
+        to_excel(pred_html, excel_path)
+        logger.info("excel saved to {}".format(excel_path))
+        elapse = time.time() - starttime
+        logger.info("Predict time : {:.3f}s".format(elapse))
+
+        if len(pred_res["cell_bbox"]) > 0 and len(pred_res["cell_bbox"][0]) == 4:
+            img = predict_strture.draw_rectangle(image_file, pred_res["cell_bbox"])
+        else:
+            img = utility.draw_boxes(img, pred_res["cell_bbox"])
+        img_save_path = os.path.join(args.output, os.path.basename(image_file))
+        cv2.imwrite(img_save_path, img)
+
+        f_html.write("<tr>\n")
+        f_html.write(f"<td> {os.path.basename(image_file)} <br/>\n")
+        f_html.write(f'<td><img src="{image_file}" width=640></td>\n')
+        f_html.write(
+            '<td><table  border="1">'
+            + pred_html.replace("<html><body><table>", "").replace(
+                "</table></body></html>", ""
+            )
+            + "</table></td>\n"
+        )
+        f_html.write(f'<td><img src="{os.path.basename(image_file)}" width=640></td>\n')
+        f_html.write("</tr>\n")
+    f_html.write("</table>\n")
+    f_html.close()
+
+    if args.benchmark:
+        table_sys.table_structurer.autolog.report()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    args.image_dir = r"12_tables"
+
+    #args.det_model_dir = "...\en_ppocr_mobile_v2.0_table_det_infer"
+    args.det_model_dir = "inference_table/en_PP-OCRv3_det_infer"
+    #args.rec_model_dir = "...\en_ppocr_mobile_v2.0_table_rec_infer"
+    args.rec_model_dir = "inference_table/en_PP-OCRv3_rec_infer"
+    args.table_model_dir = "model_final"
+    #args.table_model_dir = "...\Desktop\model_final"
+    args.rec_char_dict_path = "dict_table/en_dict.txt"
+    args.table_char_dict_path = "dict_table/table_structure_dict.txt"
+    args.font_path = r'\Fonts\Arial.ttf'
+
+    args.output = "output"
+    if args.use_mp:
+        import subprocess
+
+        p_list = []
+        total_process_num = args.total_process_num
+        for process_id in range(total_process_num):
+            cmd = (
+                [sys.executable, "-u"]
+                + sys.argv
+                + ["--process_id={}".format(process_id), "--use_mp={}".format(False)]
+            )
+            p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
+            p_list.append(p)
+        for p in p_list:
+            p.wait()
+    else:
+        main(args)
diff --git a/docling_ibm_models/slanet_1m/program.py b/docling_ibm_models/slanet_1m/program.py
new file mode 100644
index 0000000..daa6fa6
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/program.py
@@ -0,0 +1,826 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import platform
+import yaml
+import time
+import datetime
+import paddle
+import paddle.distributed as dist
+from tqdm import tqdm
+import cv2
+import numpy as np
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+
+from paddleocr.ppocr.utils.stats import TrainingStats
+from paddleocr.ppocr.utils.save_load import save_model
+from paddleocr.ppocr.utils.utility import print_dict, AverageMeter
+from paddleocr.ppocr.utils.logging import get_logger
+from paddleocr.ppocr.utils.loggers import WandbLogger, Loggers
+from paddleocr.ppocr.utils import profiler
+from paddleocr.ppocr.data import build_dataloader
+
+
+class ArgsParser(ArgumentParser):
+    def __init__(self):
+        super(ArgsParser, self).__init__(formatter_class=RawDescriptionHelpFormatter)
+        self.add_argument("-c", "--config", help="configuration file to use")
+        self.add_argument("-o", "--opt", nargs="+", help="set configuration options")
+        self.add_argument(
+            "-p",
+            "--profiler_options",
+            type=str,
+            default=None,
+            help="The option of profiler, which should be in format "
+            '"key1=value1;key2=value2;key3=value3".',
+        )
+
+    def parse_args(self, argv=None):
+        args = super(ArgsParser, self).parse_args(argv)
+        assert args.config is not None, "Please specify --config=configure_file_path."
+        args.opt = self._parse_opt(args.opt)
+        return args
+
+    def _parse_opt(self, opts):
+        config = {}
+        if not opts:
+            return config
+        for s in opts:
+            s = s.strip()
+            k, v = s.split("=")
+            config[k] = yaml.load(v, Loader=yaml.Loader)
+        return config
+
+
+def load_config(file_path):
+    """
+    Load config from yml/yaml file.
+    Args:
+        file_path (str): Path of the config file to be loaded.
+    Returns: global config
+    """
+    _, ext = os.path.splitext(file_path)
+    assert ext in [".yml", ".yaml"], "only support yaml files for now"
+    config = yaml.load(open(file_path, "rb"), Loader=yaml.Loader)
+    return config
+
+
+def merge_config(config, opts):
+    """
+    Merge config into global config.
+    Args:
+        config (dict): Config to be merged.
+    Returns: global config
+    """
+    for key, value in opts.items():
+        if "." not in key:
+            if isinstance(value, dict) and key in config:
+                config[key].update(value)
+            else:
+                config[key] = value
+        else:
+            sub_keys = key.split(".")
+            assert sub_keys[0] in config, (
+                "the sub_keys can only be one of global_config: {}, but get: "
+                "{}, please check your running command".format(
+                    config.keys(), sub_keys[0]
+                )
+            )
+            cur = config[sub_keys[0]]
+            for idx, sub_key in enumerate(sub_keys[1:]):
+                if idx == len(sub_keys) - 2:
+                    cur[sub_key] = value
+                else:
+                    cur = cur[sub_key]
+    return config
+
+
+def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False):
+    """
+    Log error and exit when set use_gpu=true in paddlepaddle
+    cpu version.
+    """
+    err = (
+        "Config {} cannot be set as true while your paddle "
+        "is not compiled with {} ! \nPlease try: \n"
+        "\t1. Install paddlepaddle to run model on {} \n"
+        "\t2. Set {} as false in config file to run "
+        "model on CPU"
+    )
+
+    try:
+        if use_gpu and use_xpu:
+            print("use_xpu and use_gpu can not both be true.")
+        if use_gpu and not paddle.is_compiled_with_cuda():
+            print(err.format("use_gpu", "cuda", "gpu", "use_gpu"))
+            sys.exit(1)
+        if use_xpu and not paddle.device.is_compiled_with_xpu():
+            print(err.format("use_xpu", "xpu", "xpu", "use_xpu"))
+            sys.exit(1)
+        if use_npu:
+            if (
+                int(paddle.version.major) != 0
+                and int(paddle.version.major) <= 2
+                and int(paddle.version.minor) <= 4
+            ):
+                if not paddle.device.is_compiled_with_npu():
+                    print(err.format("use_npu", "npu", "npu", "use_npu"))
+                    sys.exit(1)
+            # is_compiled_with_npu() has been updated after paddle-2.4
+            else:
+                if not paddle.device.is_compiled_with_custom_device("npu"):
+                    print(err.format("use_npu", "npu", "npu", "use_npu"))
+                    sys.exit(1)
+        if use_mlu and not paddle.device.is_compiled_with_mlu():
+            print(err.format("use_mlu", "mlu", "mlu", "use_mlu"))
+            sys.exit(1)
+    except Exception as e:
+        pass
+
+
+def to_float32(preds):
+    if isinstance(preds, dict):
+        for k in preds:
+            if isinstance(preds[k], dict) or isinstance(preds[k], list):
+                preds[k] = to_float32(preds[k])
+            elif isinstance(preds[k], paddle.Tensor):
+                preds[k] = preds[k].astype(paddle.float32)
+    elif isinstance(preds, list):
+        for k in range(len(preds)):
+            if isinstance(preds[k], dict):
+                preds[k] = to_float32(preds[k])
+            elif isinstance(preds[k], list):
+                preds[k] = to_float32(preds[k])
+            elif isinstance(preds[k], paddle.Tensor):
+                preds[k] = preds[k].astype(paddle.float32)
+    elif isinstance(preds, paddle.Tensor):
+        preds = preds.astype(paddle.float32)
+    return preds
+
+
+def train(
+    config,
+    train_dataloader,
+    valid_dataloader,
+    device,
+    model,
+    loss_class,
+    optimizer,
+    lr_scheduler,
+    post_process_class,
+    eval_class,
+    pre_best_model_dict,
+    logger,
+    step_pre_epoch,
+    log_writer=None,
+    scaler=None,
+    amp_level="O2",
+    amp_custom_black_list=[],
+    amp_custom_white_list=[],
+    amp_dtype="float16",
+):
+    cal_metric_during_train = config["Global"].get("cal_metric_during_train", False)
+    calc_epoch_interval = config["Global"].get("calc_epoch_interval", 1)
+    log_smooth_window = config["Global"]["log_smooth_window"]
+    epoch_num = config["Global"]["epoch_num"]
+    print_batch_step = config["Global"]["print_batch_step"]
+    eval_batch_step = config["Global"]["eval_batch_step"]
+    eval_batch_epoch = config["Global"].get("eval_batch_epoch", None)
+    profiler_options = config["profiler_options"]
+
+    global_step = 0
+    if "global_step" in pre_best_model_dict:
+        global_step = pre_best_model_dict["global_step"]
+    start_eval_step = 0
+    if isinstance(eval_batch_step, list) and len(eval_batch_step) >= 2:
+        start_eval_step = eval_batch_step[0] if not eval_batch_epoch else 0
+        eval_batch_step = (
+            eval_batch_step[1]
+            if not eval_batch_epoch
+            else step_pre_epoch * eval_batch_epoch
+        )
+        if len(valid_dataloader) == 0:
+            logger.info(
+                "No Images in eval dataset, evaluation during training "
+                "will be disabled"
+            )
+            start_eval_step = 1e111
+        logger.info(
+            "During the training process, after the {}th iteration, "
+            "an evaluation is run every {} iterations".format(
+                start_eval_step, eval_batch_step
+            )
+        )
+    save_epoch_step = config["Global"]["save_epoch_step"]
+    save_model_dir = config["Global"]["save_model_dir"]
+    if not os.path.exists(save_model_dir):
+        os.makedirs(save_model_dir)
+    main_indicator = eval_class.main_indicator
+    best_model_dict = {main_indicator: 0}
+    best_model_dict.update(pre_best_model_dict)
+    train_stats = TrainingStats(log_smooth_window, ["lr"])
+    model_average = False
+    model.train()
+
+    use_srn = config["Architecture"]["algorithm"] == "SRN"
+    extra_input_models = [
+        "SRN",
+        "NRTR",
+        "SAR",
+        "SEED",
+        "SVTR",
+        "SVTR_LCNet",
+        "SPIN",
+        "VisionLAN",
+        "RobustScanner",
+        "RFL",
+        "DRRG",
+        "SATRN",
+        "SVTR_HGNet",
+        "ParseQ",
+        "CPPD",
+    ]
+    extra_input = False
+    if config["Architecture"]["algorithm"] == "Distillation":
+        for key in config["Architecture"]["Models"]:
+            extra_input = (
+                extra_input
+                or config["Architecture"]["Models"][key]["algorithm"]
+                in extra_input_models
+            )
+    else:
+        extra_input = config["Architecture"]["algorithm"] in extra_input_models
+    try:
+        model_type = config["Architecture"]["model_type"]
+    except:
+        model_type = None
+
+    algorithm = config["Architecture"]["algorithm"]
+
+    start_epoch = (
+        best_model_dict["start_epoch"] if "start_epoch" in best_model_dict else 1
+    )
+
+    total_samples = 0
+    train_reader_cost = 0.0
+    train_batch_cost = 0.0
+    reader_start = time.time()
+    eta_meter = AverageMeter()
+
+    max_iter = (
+        len(train_dataloader) - 1
+        if platform.system() == "Windows"
+        else len(train_dataloader)
+    )
+
+    for epoch in range(start_epoch, epoch_num + 1):
+        if train_dataloader.dataset.need_reset:
+            train_dataloader = build_dataloader(
+                config, "Train", device, logger, seed=epoch
+            )
+            max_iter = (
+                len(train_dataloader) - 1
+                if platform.system() == "Windows"
+                else len(train_dataloader)
+            )
+
+        for idx, batch in enumerate(train_dataloader):
+            profiler.add_profiler_step(profiler_options)
+            train_reader_cost += time.time() - reader_start
+            if idx >= max_iter:
+                break
+            lr = optimizer.get_lr()
+            images = batch[0]
+            if use_srn:
+                model_average = True
+            # use amp
+            if scaler:
+                with paddle.amp.auto_cast(
+                    level=amp_level,
+                    custom_black_list=amp_custom_black_list,
+                    custom_white_list=amp_custom_white_list,
+                    dtype=amp_dtype,
+                ):
+                    if model_type == "table" or extra_input:
+                        preds = model(images, data=batch[1:])
+                    elif model_type in ["kie"]:
+                        preds = model(batch)
+                    elif algorithm in ["CAN"]:
+                        preds = model(batch[:3])
+                    else:
+                        preds = model(images)
+                preds = to_float32(preds)
+                loss = loss_class(preds, batch)
+                avg_loss = loss["loss"]
+                scaled_avg_loss = scaler.scale(avg_loss)
+                scaled_avg_loss.backward()
+                scaler.minimize(optimizer, scaled_avg_loss)
+            else:
+                if model_type == "table" or extra_input:
+                    preds = model(images, data=batch[1:])
+                elif model_type in ["kie", "sr"]:
+                    preds = model(batch)
+                elif algorithm in ["CAN"]:
+                    preds = model(batch[:3])
+                else:
+                    preds = model(images)
+                loss = loss_class(preds, batch)
+                avg_loss = loss["loss"]
+                avg_loss.backward()
+                optimizer.step()
+
+            optimizer.clear_grad()
+
+            if (
+                cal_metric_during_train and epoch % calc_epoch_interval == 0
+            ):  # only rec and cls need
+                batch = [item.numpy() for item in batch]
+                if model_type in ["kie", "sr"]:
+                    eval_class(preds, batch)
+                elif model_type in ["table"]:
+                    post_result = post_process_class(preds, batch)
+                    eval_class(post_result, batch)
+                elif algorithm in ["CAN"]:
+                    model_type = "can"
+                    eval_class(preds[0], batch[2:], epoch_reset=(idx == 0))
+                else:
+                    if config["Loss"]["name"] in [
+                        "MultiLoss",
+                        "MultiLoss_v2",
+                    ]:  # for multi head loss
+                        post_result = post_process_class(
+                            preds["ctc"], batch[1]
+                        )  # for CTC head out
+                    elif config["Loss"]["name"] in ["VLLoss"]:
+                        post_result = post_process_class(preds, batch[1], batch[-1])
+                    else:
+                        post_result = post_process_class(preds, batch[1])
+                    eval_class(post_result, batch)
+                metric = eval_class.get_metric()
+                train_stats.update(metric)
+
+            train_batch_time = time.time() - reader_start
+            train_batch_cost += train_batch_time
+            eta_meter.update(train_batch_time)
+            global_step += 1
+            total_samples += len(images)
+
+            if not isinstance(lr_scheduler, float):
+                lr_scheduler.step()
+
+            # logger and visualdl
+            stats = {
+                k: float(v) if v.shape == [] else v.numpy().mean()
+                for k, v in loss.items()
+            }
+            stats["lr"] = lr
+            train_stats.update(stats)
+
+            if log_writer is not None and dist.get_rank() == 0:
+                log_writer.log_metrics(
+                    metrics=train_stats.get(), prefix="TRAIN", step=global_step
+                )
+
+            if dist.get_rank() == 0 and (
+                (global_step > 0 and global_step % print_batch_step == 0)
+                or (idx >= len(train_dataloader) - 1)
+            ):
+                logs = train_stats.log()
+
+                eta_sec = (
+                    (epoch_num + 1 - epoch) * len(train_dataloader) - idx - 1
+                ) * eta_meter.avg
+                eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
+                max_mem_reserved_str = ""
+                max_mem_allocated_str = ""
+                if paddle.device.is_compiled_with_cuda():
+                    max_mem_reserved_str = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
+                    max_mem_allocated_str = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
+                strs = (
+                    "epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: "
+                    "{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, "
+                    "ips: {:.5f} samples/s, eta: {}, {} {}".format(
+                        epoch,
+                        epoch_num,
+                        global_step,
+                        logs,
+                        train_reader_cost / print_batch_step,
+                        train_batch_cost / print_batch_step,
+                        total_samples / print_batch_step,
+                        total_samples / train_batch_cost,
+                        eta_sec_format,
+                        max_mem_reserved_str,
+                        max_mem_allocated_str,
+                    )
+                )
+                logger.info(strs)
+
+                total_samples = 0
+                train_reader_cost = 0.0
+                train_batch_cost = 0.0
+            # eval
+            if (
+                global_step > start_eval_step
+                and (global_step - start_eval_step) % eval_batch_step == 0
+                and dist.get_rank() == 0
+            ):
+                if model_average:
+                    Model_Average = paddle.incubate.optimizer.ModelAverage(
+                        0.15,
+                        parameters=model.parameters(),
+                        min_average_window=10000,
+                        max_average_window=15625,
+                    )
+                    Model_Average.apply()
+                cur_metric = eval(
+                    model,
+                    valid_dataloader,
+                    post_process_class,
+                    eval_class,
+                    model_type,
+                    extra_input=extra_input,
+                    scaler=scaler,
+                    amp_level=amp_level,
+                    amp_custom_black_list=amp_custom_black_list,
+                    amp_custom_white_list=amp_custom_white_list,
+                    amp_dtype=amp_dtype,
+                )
+                cur_metric_str = "cur metric, {}".format(
+                    ", ".join(["{}: {}".format(k, v) for k, v in cur_metric.items()])
+                )
+                logger.info(cur_metric_str)
+
+                # logger metric
+                if log_writer is not None:
+                    log_writer.log_metrics(
+                        metrics=cur_metric, prefix="EVAL", step=global_step
+                    )
+
+                if cur_metric[main_indicator] >= best_model_dict[main_indicator]:
+                    best_model_dict.update(cur_metric)
+                    best_model_dict["best_epoch"] = epoch
+                    save_model(
+                        model,
+                        optimizer,
+                        save_model_dir,
+                        logger,
+                        config,
+                        is_best=True,
+                        prefix="best_accuracy",
+                        best_model_dict=best_model_dict,
+                        epoch=epoch,
+                        global_step=global_step,
+                    )
+                best_str = "best metric, {}".format(
+                    ", ".join(
+                        ["{}: {}".format(k, v) for k, v in best_model_dict.items()]
+                    )
+                )
+                logger.info(best_str)
+                # logger best metric
+                if log_writer is not None:
+                    log_writer.log_metrics(
+                        metrics={
+                            "best_{}".format(main_indicator): best_model_dict[
+                                main_indicator
+                            ]
+                        },
+                        prefix="EVAL",
+                        step=global_step,
+                    )
+
+                    log_writer.log_model(
+                        is_best=True, prefix="best_accuracy", metadata=best_model_dict
+                    )
+
+            reader_start = time.time()
+        if dist.get_rank() == 0:
+            save_model(
+                model,
+                optimizer,
+                save_model_dir,
+                logger,
+                config,
+                is_best=False,
+                prefix="latest",
+                best_model_dict=best_model_dict,
+                epoch=epoch,
+                global_step=global_step,
+            )
+
+            if log_writer is not None:
+                log_writer.log_model(is_best=False, prefix="latest")
+
+        if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0:
+            save_model(
+                model,
+                optimizer,
+                save_model_dir,
+                logger,
+                config,
+                is_best=False,
+                prefix="iter_epoch_{}".format(epoch),
+                best_model_dict=best_model_dict,
+                epoch=epoch,
+                global_step=global_step,
+            )
+            if log_writer is not None:
+                log_writer.log_model(
+                    is_best=False, prefix="iter_epoch_{}".format(epoch)
+                )
+
+    best_str = "best metric, {}".format(
+        ", ".join(["{}: {}".format(k, v) for k, v in best_model_dict.items()])
+    )
+    logger.info(best_str)
+    if dist.get_rank() == 0 and log_writer is not None:
+        log_writer.close()
+    return
+
+
+def eval(
+    model,
+    valid_dataloader,
+    post_process_class,
+    eval_class,
+    model_type=None,
+    extra_input=False,
+    scaler=None,
+    amp_level="O2",
+    amp_custom_black_list=[],
+    amp_custom_white_list=[],
+    amp_dtype="float16",
+):
+    model.eval()
+    with paddle.no_grad():
+        total_frame = 0.0
+        total_time = 0.0
+        pbar = tqdm(
+            total=len(valid_dataloader), desc="eval model:", position=0, leave=True
+        )
+        max_iter = (
+            len(valid_dataloader) - 1
+            if platform.system() == "Windows"
+            else len(valid_dataloader)
+        )
+        sum_images = 0
+        for idx, batch in enumerate(valid_dataloader):
+            if idx >= max_iter:
+                break
+            images = batch[0]
+            start = time.time()
+
+            # use amp
+            if scaler:
+                with paddle.amp.auto_cast(
+                    level=amp_level,
+                    custom_black_list=amp_custom_black_list,
+                    dtype=amp_dtype,
+                ):
+                    if model_type == "table" or extra_input:
+                        preds = model(images, data=batch[1:])
+                    elif model_type in ["kie"]:
+                        preds = model(batch)
+                    elif model_type in ["can"]:
+                        preds = model(batch[:3])
+                    elif model_type in ["sr"]:
+                        preds = model(batch)
+                        sr_img = preds["sr_img"]
+                        lr_img = preds["lr_img"]
+                    else:
+                        preds = model(images)
+                preds = to_float32(preds)
+            else:
+                if model_type == "table" or extra_input:
+                    preds = model(images, data=batch[1:])
+                elif model_type in ["kie"]:
+                    preds = model(batch)
+                elif model_type in ["can"]:
+                    preds = model(batch[:3])
+                elif model_type in ["sr"]:
+                    preds = model(batch)
+                    sr_img = preds["sr_img"]
+                    lr_img = preds["lr_img"]
+                else:
+                    preds = model(images)
+
+            batch_numpy = []
+            for item in batch:
+                if isinstance(item, paddle.Tensor):
+                    batch_numpy.append(item.numpy())
+                else:
+                    batch_numpy.append(item)
+            # Obtain usable results from post-processing methods
+            total_time += time.time() - start
+            # Evaluate the results of the current batch
+            if model_type in ["table", "kie"]:
+                if post_process_class is None:
+                    eval_class(preds, batch_numpy)
+                else:
+                    post_result = post_process_class(preds, batch_numpy)
+                    eval_class(post_result, batch_numpy)
+            elif model_type in ["sr"]:
+                eval_class(preds, batch_numpy)
+            elif model_type in ["can"]:
+                eval_class(preds[0], batch_numpy[2:], epoch_reset=(idx == 0))
+            else:
+                post_result = post_process_class(preds, batch_numpy[1])
+                eval_class(post_result, batch_numpy)
+
+            pbar.update(1)
+            total_frame += len(images)
+            sum_images += 1
+        # Get final metric，eg. acc or hmean
+        metric = eval_class.get_metric()
+
+    pbar.close()
+    model.train()
+    metric["fps"] = total_frame / total_time
+    return metric
+
+
+def update_center(char_center, post_result, preds):
+    result, label = post_result
+    feats, logits = preds
+    logits = paddle.argmax(logits, axis=-1)
+    feats = feats.numpy()
+    logits = logits.numpy()
+
+    for idx_sample in range(len(label)):
+        if result[idx_sample][0] == label[idx_sample][0]:
+            feat = feats[idx_sample]
+            logit = logits[idx_sample]
+            for idx_time in range(len(logit)):
+                index = logit[idx_time]
+                if index in char_center.keys():
+                    char_center[index][0] = (
+                        char_center[index][0] * char_center[index][1] + feat[idx_time]
+                    ) / (char_center[index][1] + 1)
+                    char_center[index][1] += 1
+                else:
+                    char_center[index] = [feat[idx_time], 1]
+    return char_center
+
+
+def get_center(model, eval_dataloader, post_process_class):
+    pbar = tqdm(total=len(eval_dataloader), desc="get center:")
+    max_iter = (
+        len(eval_dataloader) - 1
+        if platform.system() == "Windows"
+        else len(eval_dataloader)
+    )
+    char_center = dict()
+    for idx, batch in enumerate(eval_dataloader):
+        if idx >= max_iter:
+            break
+        images = batch[0]
+        start = time.time()
+        preds = model(images)
+
+        batch = [item.numpy() for item in batch]
+        # Obtain usable results from post-processing methods
+        post_result = post_process_class(preds, batch[1])
+
+        # update char_center
+        char_center = update_center(char_center, post_result, preds)
+        pbar.update(1)
+
+    pbar.close()
+    for key in char_center.keys():
+        char_center[key] = char_center[key][0]
+    return char_center
+
+
+def preprocess(is_train=False):
+    FLAGS = ArgsParser().parse_args()
+    profiler_options = FLAGS.profiler_options
+    config = load_config(FLAGS.config)
+    config = merge_config(config, FLAGS.opt)
+    profile_dic = {"profiler_options": FLAGS.profiler_options}
+    config = merge_config(config, profile_dic)
+
+    if is_train:
+        # save_config
+        save_model_dir = config["Global"]["save_model_dir"]
+        os.makedirs(save_model_dir, exist_ok=True)
+        with open(os.path.join(save_model_dir, "config.yml"), "w") as f:
+            yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False)
+        log_file = "{}/train.log".format(save_model_dir)
+    else:
+        log_file = None
+    logger = get_logger(log_file=log_file)
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config["Global"].get("use_gpu", False)
+    use_xpu = config["Global"].get("use_xpu", False)
+    use_npu = config["Global"].get("use_npu", False)
+    use_mlu = config["Global"].get("use_mlu", False)
+
+    alg = config["Architecture"]["algorithm"]
+    assert alg in [
+        "EAST",
+        "DB",
+        "SAST",
+        "Rosetta",
+        "CRNN",
+        "STARNet",
+        "RARE",
+        "SRN",
+        "CLS",
+        "PGNet",
+        "Distillation",
+        "NRTR",
+        "TableAttn",
+        "SAR",
+        "PSE",
+        "SEED",
+        "SDMGR",
+        "LayoutXLM",
+        "LayoutLM",
+        "LayoutLMv2",
+        "PREN",
+        "FCE",
+        "SVTR",
+        "SVTR_LCNet",
+        "ViTSTR",
+        "ABINet",
+        "DB++",
+        "TableMaster",
+        "SPIN",
+        "VisionLAN",
+        "Gestalt",
+        "SLANet",
+        "RobustScanner",
+        "CT",
+        "RFL",
+        "DRRG",
+        "CAN",
+        "Telescope",
+        "SATRN",
+        "SVTR_HGNet",
+        "ParseQ",
+        "CPPD",
+    ]
+
+    if use_xpu:
+        device = "xpu:{0}".format(os.getenv("FLAGS_selected_xpus", 0))
+    elif use_npu:
+        device = "npu:{0}".format(os.getenv("FLAGS_selected_npus", 0))
+    elif use_mlu:
+        device = "mlu:{0}".format(os.getenv("FLAGS_selected_mlus", 0))
+    else:
+        device = "gpu:{}".format(dist.ParallelEnv().dev_id) if use_gpu else "cpu"
+    check_device(use_gpu, use_xpu, use_npu, use_mlu)
+
+    device = paddle.set_device(device)
+
+    config["Global"]["distributed"] = dist.get_world_size() != 1
+
+    loggers = []
+
+    if "use_visualdl" in config["Global"] and config["Global"]["use_visualdl"]:
+        logger.warning(
+            "You are using VisualDL, the VisualDL is deprecated and "
+            "removed in ppocr!"
+        )
+        log_writer = None
+    if (
+        "use_wandb" in config["Global"] and config["Global"]["use_wandb"]
+    ) or "wandb" in config:
+        save_dir = config["Global"]["save_model_dir"]
+        wandb_writer_path = "{}/wandb".format(save_dir)
+        if "wandb" in config:
+            wandb_params = config["wandb"]
+        else:
+            wandb_params = dict()
+        wandb_params.update({"save_dir": save_dir})
+        log_writer = WandbLogger(**wandb_params, config=config)
+        loggers.append(log_writer)
+    else:
+        log_writer = None
+    print_dict(config, logger)
+
+    if loggers:
+        log_writer = Loggers(loggers)
+    else:
+        log_writer = None
+
+    logger.info("train with paddle {} and device {}".format(paddle.__version__, device))
+    return config, device, logger, log_writer
diff --git a/docling_ibm_models/slanet_1m/requirements.txt b/docling_ibm_models/slanet_1m/requirements.txt
new file mode 100644
index 0000000..a11cb3f
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/requirements.txt
@@ -0,0 +1,21 @@
+paddleocr==2.8.0
+dvc[s3]==3.58.0
+openpyxl
+premailer
+shapely==2.0.6
+scikit-image==0.24.0
+imgaug==0.4.0
+pyclipper==1.3.0.post6
+lmdb==1.5.1
+tqdm==4.66.5
+numpy==1.26.4
+RapidFuzz==3.10.0
+Cython==3.0.11
+pillow==11.0.0
+PyYAML==6.0.1
+requests==2.32.3
+albumentations==1.4.10
+# to be compatible with albumentations
+albucore==0.0.13
+#TEDS
+apted==1.0.3
\ No newline at end of file
diff --git a/docling_ibm_models/slanet_1m/src/eval.py b/docling_ibm_models/slanet_1m/src/eval.py
new file mode 100644
index 0000000..4dcf353
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/src/eval.py
@@ -0,0 +1,174 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import json
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, __dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "..")))
+
+import paddle
+from paddleocr.ppocr.data import build_dataloader, set_signal_handlers
+from modeling.architectures import build_model
+from paddleocr.ppocr.postprocess import build_post_process
+from metrics import build_metric
+from paddleocr.ppocr.utils.save_load import load_model
+import program as program
+
+
+def main():
+    global_config = config["Global"]
+    # build dataloader
+    set_signal_handlers()
+    valid_dataloader = build_dataloader(config, "Eval", device, logger)
+
+    # build post process
+    post_process_class = build_post_process(config["PostProcess"], global_config)
+
+    # build model
+    # for rec algorithm
+    if hasattr(post_process_class, "character"):
+        char_num = len(getattr(post_process_class, "character"))
+        if config["Architecture"]["algorithm"] in [
+            "Distillation",
+        ]:  # distillation model
+            for key in config["Architecture"]["Models"]:
+                if (
+                    config["Architecture"]["Models"][key]["Head"]["name"] == "MultiHead"
+                ):  # for multi head
+                    out_channels_list = {}
+                    if config["PostProcess"]["name"] == "DistillationSARLabelDecode":
+                        char_num = char_num - 2
+                    if config["PostProcess"]["name"] == "DistillationNRTRLabelDecode":
+                        char_num = char_num - 3
+                    out_channels_list["CTCLabelDecode"] = char_num
+                    out_channels_list["SARLabelDecode"] = char_num + 2
+                    out_channels_list["NRTRLabelDecode"] = char_num + 3
+                    config["Architecture"]["Models"][key]["Head"][
+                        "out_channels_list"
+                    ] = out_channels_list
+                else:
+                    config["Architecture"]["Models"][key]["Head"][
+                        "out_channels"
+                    ] = char_num
+        elif config["Architecture"]["Head"]["name"] == "MultiHead":  # for multi head
+            out_channels_list = {}
+            if config["PostProcess"]["name"] == "SARLabelDecode":
+                char_num = char_num - 2
+            if config["PostProcess"]["name"] == "NRTRLabelDecode":
+                char_num = char_num - 3
+            out_channels_list["CTCLabelDecode"] = char_num
+            out_channels_list["SARLabelDecode"] = char_num + 2
+            out_channels_list["NRTRLabelDecode"] = char_num + 3
+            config["Architecture"]["Head"]["out_channels_list"] = out_channels_list
+        else:  # base rec model
+            config["Architecture"]["Head"]["out_channels"] = char_num
+
+    model = build_model(config["Architecture"])
+    extra_input_models = [
+        "SRN",
+        "NRTR",
+        "SAR",
+        "SEED",
+        "SVTR",
+        "SVTR_LCNet",
+        "VisionLAN",
+        "RobustScanner",
+        "SVTR_HGNet",
+    ]
+    extra_input = False
+    if config["Architecture"]["algorithm"] == "Distillation":
+        for key in config["Architecture"]["Models"]:
+            extra_input = (
+                extra_input
+                or config["Architecture"]["Models"][key]["algorithm"]
+                in extra_input_models
+            )
+    else:
+        extra_input = config["Architecture"]["algorithm"] in extra_input_models
+    if "model_type" in config["Architecture"].keys():
+        if config["Architecture"]["algorithm"] == "CAN":
+            model_type = "can"
+        else:
+            model_type = config["Architecture"]["model_type"]
+    else:
+        model_type = None
+
+    # build metric
+    eval_class = build_metric(config["Metric"])
+    # amp
+    use_amp = config["Global"].get("use_amp", False)
+    amp_level = config["Global"].get("amp_level", "O2")
+    amp_custom_black_list = config["Global"].get("amp_custom_black_list", [])
+    if use_amp:
+        AMP_RELATED_FLAGS_SETTING = {
+            "FLAGS_cudnn_batchnorm_spatial_persistent": 1,
+            "FLAGS_max_inplace_grad_add": 8,
+        }
+        paddle.set_flags(AMP_RELATED_FLAGS_SETTING)
+        scale_loss = config["Global"].get("scale_loss", 1.0)
+        use_dynamic_loss_scaling = config["Global"].get(
+            "use_dynamic_loss_scaling", False
+        )
+        scaler = paddle.amp.GradScaler(
+            init_loss_scaling=scale_loss,
+            use_dynamic_loss_scaling=use_dynamic_loss_scaling,
+        )
+        if amp_level == "O2":
+            model = paddle.amp.decorate(
+                models=model, level=amp_level, master_weight=True
+            )
+    else:
+        scaler = None
+
+    best_model_dict = load_model(
+        config, model, model_type=config["Architecture"]["model_type"]
+    )
+    if len(best_model_dict):
+        logger.info("metric in ckpt ***************")
+        for k, v in best_model_dict.items():
+            logger.info("{}:{}".format(k, v))
+
+    # start eval
+    metric = program.eval(
+        model,
+        valid_dataloader,
+        post_process_class,
+        eval_class,
+        model_type,
+        extra_input,
+        scaler,
+        amp_level,
+        amp_custom_black_list,
+    )
+    os.makedirs("evaluation", exist_ok=True)
+
+    # Save metrics to evaluation/metrics.json
+    with open("evaluation/metrics.json", "w") as f:
+        json.dump(metric, f, indent=4)
+
+    logger.info("metric eval ***************")
+    for k, v in metric.items():
+        logger.info("{}:{}".format(k, v))
+
+
+if __name__ == "__main__":
+    config, device, logger, vdl_writer = program.preprocess()
+    main()
diff --git a/docling_ibm_models/slanet_1m/src/eval_table.py b/docling_ibm_models/slanet_1m/src/eval_table.py
new file mode 100644
index 0000000..58ebaa4
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/src/eval_table.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "../..")))
+
+import cv2
+import pickle
+import paddle
+from tqdm import tqdm
+from paddleocr.ppstructure.table.table_metric import TEDS
+from paddleocr.ppstructure.table.predict_table import TableSystem
+from paddleocr.ppstructure.utility import init_args
+from paddleocr.ppocr.utils.logging import get_logger
+
+logger = get_logger()
+
+
+def parse_args():
+    parser = init_args()
+    parser.add_argument("--gt_path", type=str)
+    return parser.parse_args()
+
+
+def load_txt(txt_path):
+    pred_html_dict = {}
+    if not os.path.exists(txt_path):
+        return pred_html_dict
+    with open(txt_path, encoding="utf-8") as f:
+        lines = f.readlines()
+        for line in lines:
+            line = line.strip().split("\t")
+            img_name, pred_html = line
+            pred_html_dict[img_name] = pred_html
+    return pred_html_dict
+
+
+def load_result(path):
+    data = {}
+    if os.path.exists(path):
+        data = pickle.load(open(path, "rb"))
+    return data
+
+
+def save_result(path, data):
+    old_data = load_result(path)
+    old_data.update(data)
+    with open(path, "wb") as f:
+        pickle.dump(old_data, f)
+
+
+def main(gt_path, img_root, args):
+    os.makedirs(args.output, exist_ok=True)
+    # init TableSystem
+    text_sys = TableSystem(args)
+    # load gt and preds html result
+    gt_html_dict = load_txt(gt_path)
+
+    ocr_result = load_result(os.path.join(args.output, "ocr.pickle"))
+    structure_result = load_result(os.path.join(args.output, "structure.pickle"))
+
+    pred_htmls = []
+    gt_htmls = []
+    for img_name, gt_html in tqdm(gt_html_dict.items()):
+        img = cv2.imread(os.path.join(img_root, img_name))
+        # run ocr and save result
+        if img_name not in ocr_result:
+            dt_boxes, rec_res, _, _ = text_sys._ocr(img)
+            ocr_result[img_name] = [dt_boxes, rec_res]
+            save_result(os.path.join(args.output, "ocr.pickle"), ocr_result)
+        # run structure and save result
+        if img_name not in structure_result:
+            structure_res, _ = text_sys._structure(img)
+            structure_result[img_name] = structure_res
+            save_result(os.path.join(args.output, "structure.pickle"), structure_result)
+        dt_boxes, rec_res = ocr_result[img_name]
+        structure_res = structure_result[img_name]
+        # match ocr and structure
+        pred_html = text_sys.match(structure_res, dt_boxes, rec_res)
+
+        pred_htmls.append(pred_html)
+        gt_htmls.append(gt_html)
+
+    # print(pred_htmls)
+    # print(gt_htmls)
+    # compute teds
+    teds = TEDS(n_jobs=16, structure_only=True)
+    teds2 = TEDS(n_jobs=16)
+    scores = teds.batch_evaluate_html(gt_htmls, pred_htmls)
+    scores2 = teds2.batch_evaluate_html(gt_htmls, pred_htmls)
+    logger.info("s-teds: {}".format(sum(scores) / len(scores)))
+    logger.info("teds: {}".format(sum(scores2) / len(scores2)))
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args.gt_path, args.image_dir, args)
diff --git a/docling_ibm_models/slanet_1m/src/train.py b/docling_ibm_models/slanet_1m/src/train.py
new file mode 100644
index 0000000..c2abe41
--- /dev/null
+++ b/docling_ibm_models/slanet_1m/src/train.py
@@ -0,0 +1,256 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "..")))
+
+import yaml
+import paddle
+import paddle.distributed as dist
+
+from paddleocr.ppocr.data import build_dataloader, set_signal_handlers
+from modeling.architectures import build_model
+from losses import build_loss
+from optimizer import build_optimizer
+from paddleocr.ppocr.postprocess import build_post_process
+from metrics import build_metric
+from paddleocr.ppocr.utils.save_load import load_model
+from paddleocr.ppocr.utils.utility import set_seed
+from modeling.architectures import apply_to_static
+import program as program
+
+dist.get_world_size()
+
+
+def main(config, device, logger, vdl_writer, seed):
+    # init dist environment
+    if config["Global"]["distributed"]:
+        dist.init_parallel_env()
+
+    global_config = config["Global"]
+
+    # build dataloader
+    set_signal_handlers()
+    train_dataloader = build_dataloader(config, "Train", device, logger, seed)
+    if len(train_dataloader) == 0:
+        logger.error(
+            "No Images in train dataset, please ensure\n"
+            + "\t1. The images num in the train label_file_list should be larger than or equal with batch size.\n"
+            + "\t2. The annotation file and path in the configuration file are provided normally."
+        )
+        return
+
+    if config["Eval"]:
+        valid_dataloader = build_dataloader(config, "Eval", device, logger, seed)
+    else:
+        valid_dataloader = None
+    step_pre_epoch = len(train_dataloader)
+
+    # build post process
+    post_process_class = build_post_process(config["PostProcess"], global_config)
+
+    # build model
+    # for rec algorithm
+    if hasattr(post_process_class, "character"):
+        char_num = len(getattr(post_process_class, "character"))
+        if config["Architecture"]["algorithm"] in [
+            "Distillation",
+        ]:  # distillation model
+            for key in config["Architecture"]["Models"]:
+                if (
+                    config["Architecture"]["Models"][key]["Head"]["name"] == "MultiHead"
+                ):  # for multi head
+                    if config["PostProcess"]["name"] == "DistillationSARLabelDecode":
+                        char_num = char_num - 2
+                    if config["PostProcess"]["name"] == "DistillationNRTRLabelDecode":
+                        char_num = char_num - 3
+                    out_channels_list = {}
+                    out_channels_list["CTCLabelDecode"] = char_num
+                    # update SARLoss params
+                    if (
+                        list(config["Loss"]["loss_config_list"][-1].keys())[0]
+                        == "DistillationSARLoss"
+                    ):
+                        config["Loss"]["loss_config_list"][-1]["DistillationSARLoss"][
+                            "ignore_index"
+                        ] = (char_num + 1)
+                        out_channels_list["SARLabelDecode"] = char_num + 2
+                    elif any(
+                        "DistillationNRTRLoss" in d
+                        for d in config["Loss"]["loss_config_list"]
+                    ):
+                        out_channels_list["NRTRLabelDecode"] = char_num + 3
+
+                    config["Architecture"]["Models"][key]["Head"][
+                        "out_channels_list"
+                    ] = out_channels_list
+                else:
+                    config["Architecture"]["Models"][key]["Head"][
+                        "out_channels"
+                    ] = char_num
+        elif config["Architecture"]["Head"]["name"] == "MultiHead":  # for multi head
+            if config["PostProcess"]["name"] == "SARLabelDecode":
+                char_num = char_num - 2
+            if config["PostProcess"]["name"] == "NRTRLabelDecode":
+                char_num = char_num - 3
+            out_channels_list = {}
+            out_channels_list["CTCLabelDecode"] = char_num
+            # update SARLoss params
+            if list(config["Loss"]["loss_config_list"][1].keys())[0] == "SARLoss":
+                if config["Loss"]["loss_config_list"][1]["SARLoss"] is None:
+                    config["Loss"]["loss_config_list"][1]["SARLoss"] = {
+                        "ignore_index": char_num + 1
+                    }
+                else:
+                    config["Loss"]["loss_config_list"][1]["SARLoss"]["ignore_index"] = (
+                        char_num + 1
+                    )
+                out_channels_list["SARLabelDecode"] = char_num + 2
+            elif list(config["Loss"]["loss_config_list"][1].keys())[0] == "NRTRLoss":
+                out_channels_list["NRTRLabelDecode"] = char_num + 3
+            config["Architecture"]["Head"]["out_channels_list"] = out_channels_list
+        else:  # base rec model
+            config["Architecture"]["Head"]["out_channels"] = char_num
+
+        if config["PostProcess"]["name"] == "SARLabelDecode":  # for SAR model
+            config["Loss"]["ignore_index"] = char_num - 1
+
+    model = build_model(config["Architecture"])
+
+    use_sync_bn = config["Global"].get("use_sync_bn", False)
+    if use_sync_bn:
+        model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
+        logger.info("convert_sync_batchnorm")
+
+    model = apply_to_static(model, config, logger)
+
+    # build loss
+    loss_class = build_loss(config["Loss"])
+
+    # build optim
+    optimizer, lr_scheduler = build_optimizer(
+        config["Optimizer"],
+        epochs=config["Global"]["epoch_num"],
+        step_each_epoch=len(train_dataloader),
+        model=model,
+    )
+
+    # build metric
+    eval_class = build_metric(config["Metric"])
+
+    logger.info("train dataloader has {} iters".format(len(train_dataloader)))
+    if valid_dataloader is not None:
+        logger.info("valid dataloader has {} iters".format(len(valid_dataloader)))
+
+    use_amp = config["Global"].get("use_amp", False)
+    amp_level = config["Global"].get("amp_level", "O2")
+    amp_dtype = config["Global"].get("amp_dtype", "float16")
+    amp_custom_black_list = config["Global"].get("amp_custom_black_list", [])
+    amp_custom_white_list = config["Global"].get("amp_custom_white_list", [])
+    if use_amp:
+        AMP_RELATED_FLAGS_SETTING = {
+            "FLAGS_max_inplace_grad_add": 8,
+        }
+        if paddle.is_compiled_with_cuda():
+            AMP_RELATED_FLAGS_SETTING.update(
+                {
+                    "FLAGS_cudnn_batchnorm_spatial_persistent": 1,
+                    "FLAGS_gemm_use_half_precision_compute_type": 0,
+                }
+            )
+        paddle.set_flags(AMP_RELATED_FLAGS_SETTING)
+        scale_loss = config["Global"].get("scale_loss", 1.0)
+        use_dynamic_loss_scaling = config["Global"].get(
+            "use_dynamic_loss_scaling", False
+        )
+        scaler = paddle.amp.GradScaler(
+            init_loss_scaling=scale_loss,
+            use_dynamic_loss_scaling=use_dynamic_loss_scaling,
+        )
+        if amp_level == "O2":
+            model, optimizer = paddle.amp.decorate(
+                models=model,
+                optimizers=optimizer,
+                level=amp_level,
+                master_weight=True,
+                dtype=amp_dtype,
+            )
+    else:
+        scaler = None
+
+    # load pretrain model
+    pre_best_model_dict = load_model(
+        config, model, optimizer, config["Architecture"]["model_type"]
+    )
+
+    if config["Global"]["distributed"]:
+        model = paddle.DataParallel(model)
+    # start train
+    program.train(
+        config,
+        train_dataloader,
+        valid_dataloader,
+        device,
+        model,
+        loss_class,
+        optimizer,
+        lr_scheduler,
+        post_process_class,
+        eval_class,
+        pre_best_model_dict,
+        logger,
+        step_pre_epoch,
+        vdl_writer,
+        scaler,
+        amp_level,
+        amp_custom_black_list,
+        amp_custom_white_list,
+        amp_dtype,
+    )
+
+
+def test_reader(config, device, logger):
+    loader = build_dataloader(config, "Train", device, logger)
+    import time
+
+    starttime = time.time()
+    count = 0
+    try:
+        for data in loader():
+            count += 1
+            if count % 1 == 0:
+                batch_time = time.time() - starttime
+                starttime = time.time()
+                logger.info(
+                    "reader: {}, {}, {}".format(count, len(data[0]), batch_time)
+                )
+    except Exception as e:
+        logger.info(e)
+    logger.info("finish reader: {}, Success!".format(count))
+
+
+if __name__ == "__main__":
+    config, device, logger, vdl_writer = program.preprocess(is_train=True)
+    seed = config["Global"]["seed"] if "seed" in config["Global"] else 1024
+    set_seed(seed)
+    main(config, device, logger, vdl_writer, seed)
+    # test_reader(config, device, logger)