/* parse_html */ /* main procedure */ n_in = 'c:\temp\test.html' o_in = .Stream~New(n_in) -- Create a stream object for input i = 0 Do line Over o_in i = i + 1; lines.i = line /* the pre-processing of the html file is here */ End -- over lines.0 = i elems. = parse_html() -- from stem lines. to stem elems. One tag per line. Drop lines. Do i = 1 To elems.0 line = elems.i /* the post-processing of the splitted html file is here */ End i Drop elems. Exit parse_html: procedure expose lines. comment_attr = 0 -- if 1, the attributes for the tag are put as comment elems. = ''; i = 0; suivi_tag_ind = 0; suivi_tag = ''; fin = '' Do j = 1 To lines.0 lines.j = Changestr('09'x,lines.j,' ') -- tab to space lines.j = Strip(lines.j) If lines.j = '' Then Iterate; Else Nop /* no tags */ If (Pos('<',lines.j) = 0) & (Pos('>',lines.j) = 0) Then Do; i = i + 1; elems.i = lines.j; Iterate; End; Else Nop /* one or more tags, the last one does not end on the current line */ If (Pos('<',lines.j) > 0) |(Pos('>',lines.j) > 0) Then If (Pos('>',lines.j)=0)|(Lastpos('<',lines.j)>Lastpos('>',lines.j)) Then Do spos=Lastpos('<',lines.j) Parse var lines.j deb +(spos) end k = j + 1 lines.k = '<'end Strip(lines.k) -- put non ending tag on the following line lines.j = Left(deb,Length(deb)-1) -- text and complete tags End Else Nop /* text and complete tags */ If (Pos('<',lines.j)>0) & (Pos('>',lines.j)>0) & (Countstr('<',lines.j) = Countstr('>',lines.j)) Then Do until lines.j = '' Parse Var lines.j deb '<' tag '>' end If deb \= '' Then Do; i = i + 1; elems.i = deb; End; Else Nop Parse Var tag elem attrib elem = lower(elem) i = i + 1 If attrib='' Then elems.i = '<'elem'>' Else If comment_attr Then elems.i = '<'elem'>' ''; Else elems.i = '<'elem attrib'>' If end \= '' Then lines.j = end; Else lines.j = '' End -- until Else Nop End j elems.0 = i return elems. -- parse_html lower: return translate(arg(1), , 'abcdefghijklmnopqrstuvwxyz', , "ABCDEFGHIJKLMNOPQRSTUVWXYZ") /*---------------------------- requires ------------------------------*/ ::requires "OODPLAIN.cls"