/* parse_html */
/* main procedure */
n_in = 'c:\temp\test.html'
o_in = .Stream~New(n_in) -- Create a stream object for input
i = 0
Do line Over o_in
i = i + 1; lines.i = line
/* the pre-processing of the html file is here */
End -- over
lines.0 = i
elems. = parse_html() -- from stem lines. to stem elems. One tag per line.
Drop lines.
Do i = 1 To elems.0
line = elems.i
/* the post-processing of the splitted html file is here */
End i
Drop elems.
Exit
parse_html: procedure expose lines.
comment_attr = 0 -- if 1, the attributes for the tag are put as comment
elems. = ''; i = 0; suivi_tag_ind = 0; suivi_tag = ''; fin = ''
Do j = 1 To lines.0
lines.j = Changestr('09'x,lines.j,' ') -- tab to space
lines.j = Strip(lines.j)
If lines.j = '' Then Iterate; Else Nop
/* no tags */
If (Pos('<',lines.j) = 0) & (Pos('>',lines.j) = 0) Then
Do; i = i + 1; elems.i = lines.j; Iterate; End;
Else Nop
/* one or more tags, the last one does not end on the current line */
If (Pos('<',lines.j) > 0) |(Pos('>',lines.j) > 0) Then
If (Pos('>',lines.j)=0)|(Lastpos('<',lines.j)>Lastpos('>',lines.j)) Then
Do
spos=Lastpos('<',lines.j)
Parse var lines.j deb +(spos) end
k = j + 1
lines.k = '<'end Strip(lines.k) -- put non ending tag on the following line
lines.j = Left(deb,Length(deb)-1) -- text and complete tags
End
Else Nop
/* text and complete tags */
If (Pos('<',lines.j)>0) & (Pos('>',lines.j)>0) & (Countstr('<',lines.j) = Countstr('>',lines.j)) Then
Do until lines.j = ''
Parse Var lines.j deb '<' tag '>' end
If deb \= '' Then Do; i = i + 1; elems.i = deb; End; Else Nop
Parse Var tag elem attrib
elem = lower(elem)
i = i + 1
If attrib='' Then elems.i = '<'elem'>'
Else If comment_attr Then elems.i = '<'elem'>' ''; Else elems.i = '<'elem attrib'>'
If end \= '' Then lines.j = end; Else lines.j = ''
End -- until
Else Nop
End j
elems.0 = i
return elems. -- parse_html
lower: return translate(arg(1), ,
'abcdefghijklmnopqrstuvwxyz', ,
"ABCDEFGHIJKLMNOPQRSTUVWXYZ")
/*---------------------------- requires ------------------------------*/
::requires "OODPLAIN.cls"