commit b698570ff9d1c6a4d850973b94d1489b5056c427 Author: root Date: Tue Dec 29 09:13:51 2020 +0600 Initial commit diff --git a/GOOGLE.pyw b/GOOGLE.pyw new file mode 100644 index 0000000..ba3b595 --- /dev/null +++ b/GOOGLE.pyw @@ -0,0 +1,28 @@ +from bs4 import BeautifulSoup +import requests +import sys, getopt +import urllib.parse + +def main(argv): + inputfile = '' + inputfile = urllib.parse.quote(inputfile.encode('utf8'), ':/') + numberfile = '' + try: + opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="]) + except getopt.GetoptError: + print ('spotitube.py -i -o ') + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print ('spotitube.py -i -o ') + sys.exit() + elif opt in ("-i", "--ifile"): + inputfile = arg + elif opt in ("-o", "--ifile"): + numberfile = arg + html_content = requests.get(inputfile).text + soup = BeautifulSoup(html_content, "lxml") + print(soup.findAll("p",{"id":"dictionary_translate_article_translation"})) + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/dependencies.sh b/dependencies.sh new file mode 100644 index 0000000..8418c04 --- /dev/null +++ b/dependencies.sh @@ -0,0 +1,2 @@ +pip3 install tkinter +pip3 install bs4 \ No newline at end of file diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..c548196 --- /dev/null +++ b/readme.md @@ -0,0 +1,8 @@ +# Sozdikscraper +Sozdikscraper is a tool that allows you to scrape sozdik.kz for translations. For now, it only works from +Russian to Kazakh. +# Installation +Just run **./dependencies.sh** or install python, pip, bs4 and html2text +# Usage +Step 1: Launch the program ``` ./sozdik.sh ``` +Step 2: Translate! diff --git a/sozdik.sh b/sozdik.sh new file mode 100644 index 0000000..371141d --- /dev/null +++ b/sozdik.sh @@ -0,0 +1,7 @@ +#! /bin/bash +agent="Mozilla/5.0 (Windows NT 10.0; x86_64; rv:67.0) Gecko/20100101 Firefox/67.0" +while true +do + read -p "Translate what word: " word + python3 GOOGLE.pyw -i "$(echo "https://sozdik.kz/ru/dictionary/translate/ru/kk/"$word"/")" | html2text +done diff --git a/theMostPoupularInNews b/theMostPoupularInNews new file mode 100644 index 0000000..e69de29