0
Reply

Help Reading HTML!

Steve

Steve

Jul 8 2010 9:43 PM
1.8k
Hi I am trying to read some HTML of a webpage. The webpage is http://marketplace.xbox.com/en-US/games/catalog.aspx?d=0&r=-1&g=-1&mt=0&ot=0&st=Call+of+Duty&sb=2&rl=0&p=1. What i am trying to do, is get the inner text for each instance of the class name XbcMktGameItemDetailTitle . The result i'm getting is only Call of Duty 2 and I should get Call of Duty2Call of Duty 3....Please ask questions if confusing.

 Here is my code:

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Web;
using mshtml;
using System.Net;

namespace WindowsFormsApplication1
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }


        private void button1_Click(object sender, EventArgs e)
        {
            string html;
            using (WebClient wc = new WebClient())
            {
                //Set the game's title ID page as the page's source that we're looking at
                html = wc.DownloadString("http://marketplace.xbox.com/en-US/games/catalog.aspx?d=0&r=-1&g=-1&mt=0&ot=0&st=Call+of+Duty&sb=2&rl=0&p=1");
            }
            object[] oPageText = { html };
            HTMLDocument doc = new HTMLDocument();
            IHTMLDocument2 doc2 = (IHTMLDocument2)doc;
            doc2.write(oPageText);
            foreach (IHTMLElement div in doc.getElementsByTagName("p"))
            {
                //Get the game image
                if (div.className == "XbcMktGameItemDetailTitle")
                {
                    string imagePath = div.innerHTML.Remove(0, 0);
                    imagePath = imagePath.Remove(imagePath.Length - 0, 0);
                    label1.Text = label1.Text + imagePath;
                    break;
                }
            }
        }
    }
}