Recipe 1
Automatically log on to twitter.
#!/usr/bin/env python # -*- coding: UTF-8 -*- import os import sys import codecs import pprint import unittest from selenium import webdriver from selenium.webdriver.common.keys import Keys import selenium.webdriver.support.ui as ui import logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) class PythonOrgSearch(unittest.TestCase): def setUp(self): self.driver = webdriver.Firefox() def test_search_in_python_org(self): driver = self.driver driver.get("https://twitter.com/") driver.maximize_window() username = driver.find_element_by_class_name("js-username-field") password = driver.find_element_by_class_name("js-password-field") username.send_keys("myusername") password.send_keys("mypassword") wait = ui.WebDriverWait(driver, 5) driver.find_element_by_css_selector("button.submit.btn.primary-btn").click() def tearDown(self): #self.driver.close() print "close?" if __name__ == "__main__": unittest.main()
Recipe 2
Logon on to twitter with Firefox (headlessly) and scrape followers from a user profile:
#!/usr/bin/env python # -*- coding: UTF-8 -*- from selenium import webdriver from selenium.webdriver.common.keys import Keys from pyvirtualdisplay import Display def correct_url(url): if not url.startswith("http://") and not url.startswith("https://"): url = "http://" + url return url def scrollDown(browser, numberOfScrollDowns): body = browser.find_element_by_tag_name("body") while numberOfScrollDowns >=0: body.send_keys(Keys.PAGE_DOWN) numberOfScrollDowns -= 1 if numberOfScrollDowns % 10 == 0: print 'remaining scroll downs ... {}'.format(numberOfScrollDowns) return browser def crawl_url(url, run_headless=True): if run_headless: display = Display(visible=0, size=(1024, 768)) display.start() url = correct_url(url) browser = webdriver.Firefox() browser.get(url) username = browser.find_element_by_class_name("js-username-field") password = browser.find_element_by_class_name("js-password-field") username.send_keys("username") password.send_keys("password") browser.find_element_by_css_selector("button.submit.btn.primary-btn").click() while True: target_set = set() browser = scrollDown(browser, 500) all_targets = browser.find_elements_by_class_name("u-linkComplex-target") for a_target in all_targets: target_set.add(a_target.text) fo = open('followers.dat', 'w') for target in target_set: fo.write(target + '\n') fo.close() print 'wrote {} to file'.format(len(target_set)) browser.quit() if __name__=='__main__': url = "https://twitter.com/username/followers/" crawl_url(url)
References
- [TidbitsOfProgramming] Crawling Websites that Loads Contents
Usually I do not read post on blogs, but I would like to say that this write-up very forced me to try and do it! Your writing style has been surprised me. Thanks, very nice article.
ReplyDeleteSelenium Training in Chennai
Thanks for this tutorial, please feel to try the scripts out that had your work referenced.
ReplyDeletehttps://github.com/mpfarmer/TwSearchEnabled
Nice information thank you,if you want more information please visit our link selenium Online course
ReplyDeleteThis is an awesome post.Really very informative and creative contents. These concept is a good way to enhance the knowledge.
ReplyDeleteI like it and help me to development very well.Thank you for this brief explanation and very nice information.Well, got a good knowledge.
Java training in Indira nagar
Java training in Rajaji nagar
Java training in Marathahalli
Java training in Btm layout
Java training in Marathahalli
selenium online trainings
ReplyDeleteselenium online trainings from India