...den Text einer HTML Datei extrahieren?

Autor: EddieShipman

Kategorie: Internet / LAN

// Very easy way to parse text from HTML using IHTMLDocument2.

uses
  
mshtml, ActiveX, ComObj;

procedure TForm1.Button1Click(Sender: TObject);
var
  
IDoc: IHTMLDocument2;
  Strl: TStringList;
  sHTMLFile: string;
  v: Variant;
  Links: IHTMLElementCollection;
  i: Integer;
  Link: IHTMLAnchorElement;
begin
  if 
OpenDialog1.Execute then
  begin
    
sHTMLFile := OpenDialog1.FileName;
    Strl := TStringList.Create;
    try
      
Strl.LoadFromFile(sHTMLFile);
      Idoc := CreateComObject(Class_HTMLDOcument) as IHTMLDocument2;
      try
        
IDoc.designMode := 'on';
        while IDoc.readyState <> 'complete' do
          
Application.ProcessMessages;
        v := VarArrayCreate([0, 0], VarVariant);
        v[0] := Strl.Text;
        IDoc.Write(PSafeArray(System.TVarData(v).VArray));
        IDoc.designMode := 'off';
        while IDoc.readyState <> 'complete' do
          
Application.ProcessMessages;
        Memo1.Lines.Text := IDoc.body.innerText;
      finally
        
IDoc := nil;
      end;
    finally
      
Strl.Free;
    end;
  end;
end;

 

printed from
www.swissdelphicenter.ch
developers knowledge base