1+ from  bs4  import  BeautifulSoup , Tag 
2+ from  docutils  import  nodes 
3+ import  os .path 
4+ 
5+ known_start_tags  =  ['p' ,'img' ,'table' ]
6+ hidden_tag  =  'hidden' 
7+ 
8+ def  html2Docutils (app , doctree ):
9+     #find all raw nodes 
10+     if  not  app .env .config .sphinx_md_processRaw :
11+         return 
12+     filepath  =  doctree ['source' ]
13+     htmlCounter  =  0 
14+     for  node  in  doctree .traverse (nodes .raw ):
15+         soup  =  BeautifulSoup (node .astext (),features = "html.parser" )
16+         if  soup .find ():
17+             if  soup .find ().name  in  known_start_tags :
18+                 #send it to converter 
19+                 div  =  nodes .container ()
20+                 div ['id' ]= 'html-content-'  +  str (htmlCounter )
21+                 htmlCounter  +=  1 
22+                 convertHTML (soup , div , app , filepath )
23+                 parent  =  node .parent 
24+                 parent .replace (node ,div .children )
25+                 #replace raw node with output of converter 
26+                 #child = nodes.Text("poof") 
27+                 #node[0]=child 
28+             elif  soup .find ().name  ==  hidden_tag :
29+                 hidden_comment  =  nodes .comment ()
30+                 comment_text  =  nodes .Text ("hidden" )
31+                 hidden_comment .append (comment_text )
32+                 parent  =  node .parent 
33+                 parent .replace (node ,hidden_comment )
34+ 
35+ def  convertHTML (soup , parent , app , filepath ):
36+     if  hasattr (soup ,"children" ):
37+         for  child  in  soup .children :
38+             node  =  None 
39+             if  hasattr (child ,"name" ) and  child .name  is  not None :
40+                 if  child .name  ==  "table" :
41+                     if  filepath  not  in app .env .config .sphinx_md_tableIDs :
42+                         app .env .config .sphinx_md_tableIDs ['filepath' ]= 0 
43+                     else :
44+                         app .evn .config .sphinx_md_tableIDs ['filepath' ] +=  1 
45+                     tNode  =  nodes .table ()
46+                     tNode ['ids' ].append ("id" + str (app .env .config .sphinx_md_tableIDs ['filepath' ]))
47+                     titleNode  =  nodes .title ()
48+                     node  =  nodes .tgroup ()
49+                     ncols  =  getNumCols (child )
50+                     node ['cols' ]=  ncols 
51+                     for  x  in  range (ncols ):
52+                         colspecNode  =  nodes .colspec ()
53+                         colspecNode ["colwidth" ]= 1 
54+                         node  +=  colspecNode 
55+                     tNode  +=  titleNode 
56+                     tNode  +=  node 
57+                     parent  +=  tNode 
58+                 elif  child .name  ==  "p" :
59+                     node  =  nodes .paragraph ()
60+                     parent  +=  node 
61+                 elif  child .name  ==  "img" :
62+                     node  =  nodes .image ()
63+                     imgPath  =  "" 
64+                     if  "alt"  in  child .attrs :
65+                         node ["alt" ]= child .attrs ['alt' ]
66+                     if  "src"  in  child .attrs :
67+                         if  "https"  in  child .attrs ['src' ]:
68+                             node ["uri" ]= child .attrs ['src' ]
69+                         else :
70+                             basepath  =  app .env .srcdir  +  "/" 
71+                             docfilename  =  os .path .splitext (os .path .relpath (filepath ,basepath ))[0 ]
72+                             relpath  =  os .path .dirname (os .path .relpath (filepath ,basepath ))
73+                             imgPath  =  os .path .join (relpath ,child .attrs ['src' ])
74+                             node ["uri" ]=  imgPath 
75+                             if  os .path .isfile (imgPath ):
76+                                 if  imgPath  not  in app .env .images :
77+                                     imageFileName  =  os .path .basename (imgPath )
78+                                     imageTuple  =  ({docfilename },imageFileName )
79+                                     app .env .images [imgPath ]= imageTuple 
80+                     if  "width"  in  child .attrs :
81+                         suffix  =  '' 
82+                         if  child .attrs ['width' ].isnumeric ():
83+                             suffix  =  'px' 
84+                         node ["width" ]= child .attrs ['width' ] +  suffix 
85+                     if  "height"  in  child .attrs :
86+                         node ["height" ]= child .attrs ['height' ]
87+                     node ["candidates" ]= "{'*': '"  +  imgPath  +  "'}" 
88+                     parent  +=  node 
89+                 elif  child .name  ==  "thead" :
90+                     node  =  nodes .thead ()
91+                     parent  +=  node 
92+                 elif  child .name  ==  "tbody" :
93+                     node  =  nodes .tbody ()
94+                     parent  +=  node 
95+                 elif  child .name  ==  "tr" :
96+                     node  =  nodes .row ()
97+                     parent  +=  node 
98+                 elif  child .name  ==  "th"  or  child .name  ==  "td" :
99+                     eNode  =  nodes .entry ()
100+                     node  =  nodes .paragraph ()
101+                     eNode  +=  node 
102+                     parent  +=  eNode 
103+                 elif  child .name  ==  "sup" :
104+                     node  =  nodes .superscript ()
105+                     parent  +=  node 
106+                 elif  child .name  ==  "a" :
107+                     node  =  nodes .reference ()
108+                     node ["refuri" ] =  child .attrs ['href' ]
109+                     parent  +=  node 
110+                 elif  child .name  ==  "code" :
111+                     node  =  nodes .literal ()
112+                     parent  +=  node 
113+             else :
114+                 if  isinstance (parent ,nodes .Node ):
115+                 #if isinstance(parent, nodes.entry) or isinstance(parent, nodes.paragraph) or isinstance(parent, nodes.image) or isinstance(parent, nodes.superscript) or isinstance(parent, nodes.reference) or isinstance(parent, nodes.literal): 
116+                     node  =  nodes .Text (child )
117+                     parent  +=  node 
118+             if  node :
119+                 convertHTML (child ,node ,app ,filepath )
120+ 
121+ def  removeHTMLAttributes (soup ,tagName ):
122+     tags  =  soup .find_all (tagName )
123+     for  tag  in  tags :
124+         attList  =  []
125+         for  attr  in  tag .attrs :
126+             attList .append (attr )
127+         for  att  in  attList :
128+             del  tag [att ]
129+     return  soup 
130+ 
131+ def  replaceTag (soup ,oldTag ,newTag ,delAttrs = True ):
132+     tags  =  soup .find_all (oldTag )
133+     for  tag  in  tags :
134+         tag .name  =  newTag 
135+         if  delAttrs :
136+             attList  =  []
137+             for  attr  in  tag .attrs :
138+                 attList .append (attr )
139+             for  att  in  attList :
140+                 del  tag [att ]
141+     return  soup 
142+ 
143+ def  fixImages (soup ):
144+     imgTags  =  soup .find_all ('img' )
145+     for  imgTag  in  imgTags :
146+         altTag  =  soup .new_tag ("alt" )
147+         imgTag .name  =  "image" 
148+         imgTag ['href' ]= imgTag ['src' ]
149+         del  imgTag ['src' ]
150+         altTag .string  =  imgTag ['alt' ]
151+         del  imgTag ['alt' ]
152+         imgTag .append (altTag )
153+     return  soup 
154+ 
155+ 
156+ def  addTGroup (soup ):
157+     numCols  =  getNumCols (soup )
158+     tags  =  soup .find_all ('table' )
159+     for  tableTag  in  tags :
160+         tableTag .name  =  'tgroup' 
161+         tableTag ['cols' ]= numCols 
162+         del  tableTag ['class' ]
163+         wrap (tableTag ,soup .new_tag ("table" ))
164+     return  soup 
165+ 
166+ def  wrap (to_wrap , wrap_in ):
167+     contents  =  to_wrap .replace_with (wrap_in )
168+     wrap_in .append (contents )
169+ 
170+ def  getNumCols (soup ):
171+     rows  =  soup .find_all ('th' )
172+     return  len (rows )
0 commit comments