Monday, March 7, 2016

Python: Using Selenium to logon to Twitter

Recipe 1


Automatically log on to twitter.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import os
import sys
import codecs
import pprint

import unittest
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import selenium.webdriver.support.ui as ui

import logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

class PythonOrgSearch(unittest.TestCase):

 def setUp(self):
  self.driver = webdriver.Firefox()

 def test_search_in_python_org(self):
  driver = self.driver
  driver.get("https://twitter.com/")

  driver.maximize_window()
  
  username = driver.find_element_by_class_name("js-username-field")
  password = driver.find_element_by_class_name("js-password-field")

  username.send_keys("myusername")
  password.send_keys("mypassword")

  wait = ui.WebDriverWait(driver, 5)
  driver.find_element_by_css_selector("button.submit.btn.primary-btn").click()


 def tearDown(self):
  #self.driver.close()
  print "close?"

if __name__ == "__main__":
 unittest.main()



Recipe 2


Logon on to twitter with Firefox (headlessly) and scrape followers from a user profile: 
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from pyvirtualdisplay import Display

def correct_url(url): 
 if not url.startswith("http://") and not url.startswith("https://"):
  url = "http://" + url
 return url

def scrollDown(browser, numberOfScrollDowns):
 body = browser.find_element_by_tag_name("body")
 while numberOfScrollDowns >=0:
  body.send_keys(Keys.PAGE_DOWN)
  numberOfScrollDowns -= 1
  if numberOfScrollDowns % 10 == 0:
   print 'remaining scroll downs ... {}'.format(numberOfScrollDowns)
 return browser

def crawl_url(url, run_headless=True):
 if run_headless:
  display = Display(visible=0, size=(1024, 768))
  display.start()

 url = correct_url(url)
 browser = webdriver.Firefox()
 browser.get(url)

 username = browser.find_element_by_class_name("js-username-field")
 password = browser.find_element_by_class_name("js-password-field")

 username.send_keys("username")
 password.send_keys("password")

 browser.find_element_by_css_selector("button.submit.btn.primary-btn").click()

 while True:
  target_set = set()

  browser = scrollDown(browser, 500)

  all_targets = browser.find_elements_by_class_name("u-linkComplex-target")
  for a_target in all_targets:
   target_set.add(a_target.text)

  fo = open('followers.dat', 'w')
  for target in target_set:
   fo.write(target + '\n')
  fo.close()

  print 'wrote {} to file'.format(len(target_set))

 browser.quit()

if __name__=='__main__':
 url = "https://twitter.com/username/followers/"
 crawl_url(url)



References

  1. [TidbitsOfProgramming] Crawling Websites that Loads Contents

5 comments:

  1. Usually I do not read post on blogs, but I would like to say that this write-up very forced me to try and do it! Your writing style has been surprised me. Thanks, very nice article.

    Selenium Training in Chennai

    ReplyDelete
  2. Thanks for this tutorial, please feel to try the scripts out that had your work referenced.
    https://github.com/mpfarmer/TwSearchEnabled

    ReplyDelete
  3. Nice information thank you,if you want more information please visit our link selenium Online course

    ReplyDelete
  4. This is an awesome post.Really very informative and creative contents. These concept is a good way to enhance the knowledge.
    I like it and help me to development very well.Thank you for this brief explanation and very nice information.Well, got a good knowledge.
    Java training in Indira nagar
    Java training in Rajaji nagar
    Java training in Marathahalli
    Java training in Btm layout
    Java training in Marathahalli

    ReplyDelete