convert htmlelement to htmldocument
Hi All.,
am doing a task of scraping html table contents from web pages and store in to DB.
using mshtml, web broser control...
in can handle the spurce page in to html document and can search for element by tagname 'Table'. but the source html contains multiple Table tags which is difficult to handle. for this i need to search source html recursively.
for this my code:
private string getElementByTagName(string pTagName, string pLookupValue)
{
mshtml.IHTMLElementCollection DocAll = htmlDocument.getElementsByTagName(pTagName);
foreach (mshtml.IHTMLElement DocAllElem in DocAll)
{
string str = DocAllElem.innerHTML;
if (DocAllElem.innerHTML.IndexOf(pLookupValue) > 0)
{
lstrResult = getElementByTagNameRecursive(DocAllElem, pTagName, pLookupValue).innerHTML.ToString().Trim();
//lstrResult = DocAllElem.innerHTML;
//lstrResult = DocAllElem.innerText;
break;
}
}
return lstrResult;
}
-----------------------------------------------------------------
private mshtml.IHTMLElement getElementByTagNameRecursive(mshtml.IHTMLElement pHtmlElement, string pTagName, string pLookupValue)
{
HTMLDocument myDoc = new HTMLDocument();
//mshtml.HTMLDocument myDoc = (mshtml.HTMLDocument) pHtmlElement;
myDoc.body.innerHTML = pHtmlElement.ToString();
//(**** here am converting HTMLElement to HTMLDOcument).....
//(i couldn't make this...is this possible?.. any other way to handle this issue....)
mshtml.IHTMLElementCollection DocAll = myDoc.getElementsByTagName(pTagName);
foreach (mshtml.IHTMLElement DocAllElem in DocAll)
{
string str = DocAllElem.innerHTML;
//Console.WriteLine(str);
if (DocAllElem.innerHTML.IndexOf(pLookupValue) > 0)
{
lstrResult = getElementByTagNameRecursive(DocAllElem, pTagName, pLookupValue);
break;
}
}
return lstrResult;
}
i appriciate if you spend thier valuable time and any help....
thanks for u r time.,
Rgds,
Prasad