Scraping the Russian-Kazakh translation website. Requires your terminal to work with UTF-8 symbols
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

scrape.py 926 B

3 years ago
12345678910111213141516171819202122232425262728
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import sys, getopt
  4. import urllib.parse
  5. def main(argv):
  6. inputfile = ''
  7. inputfile = urllib.parse.quote(inputfile.encode('utf8'), ':/')
  8. numberfile = ''
  9. try:
  10. opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="])
  11. except getopt.GetoptError:
  12. print ('spotitube.py -i <Search Keyword> -o <Index>')
  13. sys.exit(2)
  14. for opt, arg in opts:
  15. if opt == '-h':
  16. print ('spotitube.py -i <Search Keyword> -o <Index>')
  17. sys.exit()
  18. elif opt in ("-i", "--ifile"):
  19. inputfile = arg
  20. elif opt in ("-o", "--ifile"):
  21. numberfile = arg
  22. html_content = requests.get(inputfile).text
  23. soup = BeautifulSoup(html_content, "lxml")
  24. print(soup.findAll("p",{"id":"dictionary_translate_article_translation"}))
  25. if __name__ == "__main__":
  26. main(sys.argv[1:])