Home All Groups Group Topic Archive Search About

Screen Scraping a Password Protected Site

Author
12 Apr 2007 7:38 PM
apondu
I'm trying to screen scrape a site that requires a password.

I am using C#.Net, i am new to this and with the information available
around on the internet i just put tht information into the code.

But still i am not able to achieve what i want to.

I have posted the code which i have written, along with the site and
the userid ans password

Can someone take a look at the code and help with the information on
where i am going wrong and guide me across with the correct procedure.
and help to perform screen scrapping of the password word protected
site.

Thnaks for the help

Regards,
Govardhan.

My Code :



public void getContent()
        {
            UTF8Encoding utf  = new UTF8Encoding();

            string url = "http://www.bloglines.com/login";

            Uri uri = new Uri(url);

            string userName = "apo***@gmail.com";

            string userPassword = "password123";

            int port = 80;

            string proxyUserName = "";

            string proxyPassword = "";

            string proxyName = "";

            CookieCollection Cookies = new CookieCollection();

            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);

            request.Method = "POST";

            request.ContentType = "application/x-www-form-urlencoded";

        //    request.Credentials = new
NetworkCredential( userName,userPassword  );

            request.CookieContainer = new CookieContainer();

            request.AllowAutoRedirect = true;

            if (Cookies != null && Cookies.Count > 0)

                request.CookieContainer.Add(Cookies);

//  Code Changed to have post data

            string postData = "email=apo***@gmail.com&password=password123";
            byte[] postBytes = Encoding.UTF8.GetBytes (postData);

            Stream postStream = request.GetRequestStream();
            postStream.Write(postBytes, 0, postBytes.Length);
            postStream.Close();

//  End of  Code Changed to have post data

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                        if (response.Cookies.Count > 0)
                        {
                            if (Cookies == null)

                            {
                                Cookies = response.Cookies;
                            }
                            else
                            {
                                // If we already have cookies update list

                                foreach (Cookie oRespCookie in    response.Cookies)
                                {
                                    bool bMatch = false;

                                    foreach(Cookie oReqCookie in Cookies)
                                    {
                                        if (oReqCookie.Name ==    oRespCookie.Name)
                                        {
                                            oReqCookie.Value =    oRespCookie.Name;

                                            bMatch = true;

                                            break;
                                        }
                                    }
                                    if (!bMatch)
                                        Cookies.Add(oRespCookie);
                                }

                            }
                        }  // End of response.Cookies.Count


            request.CookieContainer.Add(Cookies);

            request = (HttpWebRequest)WebRequest.Create("http://
www.bloglines.com/myblogs");

            response = (HttpWebResponse)request.GetResponse();

            Stream strm = response.GetResponseStream();

            System.Text.Encoding ec =
System.Text.Encoding.GetEncoding("utf-8");

            System.IO.StreamReader reader = new System.IO.StreamReader(strm,
ec);

            string str = reader.ReadToEnd();

            response.Close();
            strm.Close();
            reader.Close();

            FileStream fs = new FileStream("c:\
\q.htm",FileMode.Create,FileAccess.Write);

            StreamWriter sw = new StreamWriter(fs);

            sw.Write(str);

            sw.Close();
        }

AddThis Social Bookmark Button