| @@ -1,14 +1,15 @@ | |||||
| import tkinter | import tkinter | ||||
| from tkinter import * | from tkinter import * | ||||
| from os.path import expanduser | from os.path import expanduser | ||||
| from random import randint | |||||
| import multiprocessing | |||||
| from tkinter import filedialog | from tkinter import filedialog | ||||
| from tkinter.ttk import Progressbar | from tkinter.ttk import Progressbar | ||||
| from urllib.request import urlopen as uReq | from urllib.request import urlopen as uReq | ||||
| from bs4 import BeautifulSoup as soup | from bs4 import BeautifulSoup as soup | ||||
| import io | import io | ||||
| from mtranslate import translate | from mtranslate import translate | ||||
| import playsound | |||||
| from playsound import playsound | |||||
| import sys | import sys | ||||
| import time | import time | ||||
| from threading import * | from threading import * | ||||
| @@ -16,21 +17,57 @@ import os | |||||
| from tkinter import messagebox | from tkinter import messagebox | ||||
| desktop = expanduser("~/Documents") | desktop = expanduser("~/Documents") | ||||
| agency = "verge" | agency = "verge" | ||||
| from asciimatics.effects import Cycle, Stars | |||||
| from asciimatics.renderers import FigletText | |||||
| from asciimatics.scene import Scene | |||||
| from asciimatics.screen import Screen | |||||
| def start_anim(): | |||||
| Screen.wrapper(demo) | |||||
| def sound(): | |||||
| while True: | |||||
| playsound('tune.mp3', block=True) | |||||
| playsound(None) | |||||
| def demo(screen): | |||||
| effects = [ | |||||
| Cycle( | |||||
| screen, | |||||
| FigletText("ASCIIMATICS", font='big'), | |||||
| int(screen.height / 2 - 8)), | |||||
| Cycle( | |||||
| screen, | |||||
| FigletText("ROCKS!", font='big'), | |||||
| int(screen.height / 2 + 3)), | |||||
| Stars(screen, 200) | |||||
| ] | |||||
| screen.play([Scene(effects, 500)]) | |||||
| def chooseDirectory(): | def chooseDirectory(): | ||||
| currdir = os.getcwd() | currdir = os.getcwd() | ||||
| tempdir = filedialog.askdirectory(parent=root, initialdir=currdir, title='Please select a directory') | tempdir = filedialog.askdirectory(parent=root, initialdir=currdir, title='Please select a directory') | ||||
| program.directory = tempdir | program.directory = tempdir | ||||
| currentAgency = 0 | |||||
| def switchAgencies(agencies): | def switchAgencies(agencies): | ||||
| print("called Agencies") | print("called Agencies") | ||||
| if agencies == "verge": | |||||
| global currentAgency | |||||
| currentAgency = currentAgency + 1 | |||||
| if currentAgency > 2: | |||||
| currentAgency = 0 | |||||
| if currentAgency == 1: | |||||
| print("switching to techradar") | print("switching to techradar") | ||||
| agencies = "techradar" | agencies = "techradar" | ||||
| else: | |||||
| elif currentAgency == 2: | |||||
| print("switching to engadget") | |||||
| agencies = "engadget" | |||||
| elif currentAgency == 0: | |||||
| print("switching to verge") | print("switching to verge") | ||||
| agencies = "verge" | agencies = "verge" | ||||
| button4['text'] = agencies | button4['text'] = agencies | ||||
| class Scrapers(object): | class Scrapers(object): | ||||
| def __init__(self): | def __init__(self): | ||||
| global currentAgency | |||||
| currentAgency = 0 | |||||
| self.thread1 = None | self.thread1 = None | ||||
| self.stop_threads = Event() | self.stop_threads = Event() | ||||
| self.stopped = False | self.stopped = False | ||||
| @@ -52,6 +89,8 @@ class Scrapers(object): | |||||
| def skip(self): | def skip(self): | ||||
| self.needToSkip = True | self.needToSkip = True | ||||
| def start_thread(self): | def start_thread(self): | ||||
| print("thread started") | |||||
| Skip = Button(topFrame, text = "SKIP!", command = self.skip) | Skip = Button(topFrame, text = "SKIP!", command = self.skip) | ||||
| Skip.pack(side = BOTTOM) | Skip.pack(side = BOTTOM) | ||||
| try: | try: | ||||
| @@ -71,8 +110,11 @@ class Scrapers(object): | |||||
| else: | else: | ||||
| self.thread1 = Thread(target = self.start_now) | self.thread1 = Thread(target = self.start_now) | ||||
| self.thread1.start() | self.thread1.start() | ||||
| p.start() | |||||
| threadActive = 1 | threadActive = 1 | ||||
| def start_now(self): | def start_now(self): | ||||
| print("Getting" + button4['text']) | print("Getting" + button4['text']) | ||||
| if button4['text'] == "techradar": | if button4['text'] == "techradar": | ||||
| progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate') | progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate') | ||||
| @@ -187,6 +229,7 @@ class Scrapers(object): | |||||
| f.write("\n PICTURE URL: " + article_pic) | f.write("\n PICTURE URL: " + article_pic) | ||||
| t.write("\n PICTURE URL: " + article_pic) | t.write("\n PICTURE URL: " + article_pic) | ||||
| if self.stop_threads.is_set(): | if self.stop_threads.is_set(): | ||||
| p.terminate() | |||||
| print("I SURRENDER!") | print("I SURRENDER!") | ||||
| self.stopped = True | self.stopped = True | ||||
| f.close() | f.close() | ||||
| @@ -213,7 +256,7 @@ class Scrapers(object): | |||||
| Labels.config(text = "All Done!") | Labels.config(text = "All Done!") | ||||
| f.close() | f.close() | ||||
| t.close() | t.close() | ||||
| else: | |||||
| elif button4['text'] == "verge": | |||||
| progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate') | progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate') | ||||
| progress['value'] = 0 | progress['value'] = 0 | ||||
| progress.pack(side = TOP) | progress.pack(side = TOP) | ||||
| @@ -308,9 +351,140 @@ class Scrapers(object): | |||||
| t.write("\n" + completeText) | t.write("\n" + completeText) | ||||
| news_picture = news_soup.findAll("picture", {"class":"c-picture"}) | news_picture = news_soup.findAll("picture", {"class":"c-picture"}) | ||||
| Labels.config(text = "Getting image...") | Labels.config(text = "Getting image...") | ||||
| if news_picture[0].img != None: | |||||
| article_pic = news_picture[0].img.get("src") | |||||
| Labels.config(text = "Picture recieved!") | |||||
| if (len(news_picture) > 0): | |||||
| if news_picture[0].img != None: | |||||
| article_pic = news_picture[0].img.get("src") | |||||
| Labels.config(text = "Picture recieved!") | |||||
| else: | |||||
| print("\n THIS ARTICLE HAS NO PICTURE! ") | |||||
| Labels.config(text = "Failed to locate picture :(") | |||||
| local_progress['value'] = 120 | |||||
| f.write("\n PICTURE URL: " + article_pic) | |||||
| t.write("\n PICTURE URL: " + article_pic) | |||||
| if self.stop_threads.is_set(): | |||||
| print("I SURRENDER!") | |||||
| self.stopped = True | |||||
| f.close() | |||||
| t.close() | |||||
| self.CloseLabel.config(text = "you may close now") | |||||
| sys.exit() | |||||
| self.CloseLabel.config(text = "I tried, I failed") | |||||
| break | |||||
| else: | |||||
| print("NOTHING IS STOPPING ME!") | |||||
| Labels.config(text = "Finished the article!") | |||||
| #brand = divWithInfo.div.a.img["title"] | |||||
| #title_container = divWithInfo.find("a", "item-title") | |||||
| #product_name = title_container.text | |||||
| #shipping_container = divWithInfo.find("li", "price-ship") | |||||
| #shipping_cost = shipping_container.text.strip() | |||||
| #print("brand:"+brand) | |||||
| #print("name:"+product_name) | |||||
| #print("shipping:"+shipping_cost) | |||||
| #print("\n") | |||||
| #f.write(brand + "," + product_name.replace(",", "|") + "," + shipping_cost + "\n") | |||||
| Labels.config(text = "All Done!") | |||||
| f.close() | |||||
| t.close() | |||||
| else: | |||||
| progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate') | |||||
| progress['value'] = 0 | |||||
| progress.pack(side = TOP) | |||||
| Labels = Label(topFrame, text = "SCRAPING") | |||||
| Labels.pack(side = TOP) | |||||
| texts = "change" | |||||
| main_url = 'https://www.engadget.com/tomorrow' | |||||
| uClient = uReq(main_url) | |||||
| page_html = uClient.read() | |||||
| uClient.close() | |||||
| page_soup = soup(page_html, "html.parser") | |||||
| containers = page_soup.findAll("article",{"data-component":"PostCard"}) | |||||
| Articles = len(containers) | |||||
| filename = self.directory + "/News.txt" | |||||
| trans_filename = self.directory + "/TranslatedNews.txt" | |||||
| f = io.open(filename, "w", encoding="utf-8") | |||||
| f.write("ACTIVE") | |||||
| t = io.open(trans_filename, "w", encoding ="utf-8") | |||||
| t.write("ACTIVE") | |||||
| Labels.config(text = "setting file!") | |||||
| i = 0 | |||||
| CurrentTitle = Label(topFrame, text = "Preparing...") | |||||
| CurrentTitle.pack(side = TOP) | |||||
| for container in containers: | |||||
| i = i + 1 | |||||
| Labels.config(text = "jumping to URL!") | |||||
| progress['value'] = i * 100 / Articles | |||||
| local_progress = Progressbar(topFrame, orient = HORIZONTAL, length = 120, mode = 'determinate') | |||||
| local_progress['value'] = 0 | |||||
| local_progress.pack(side = BOTTOM) | |||||
| requiredURL = container.div.a["href"] | |||||
| secondary_URL = 'https://www.engadget.com' + requiredURL | |||||
| print("Set target URL!" + secondary_URL) | |||||
| secClient = uReq(secondary_URL) | |||||
| news_html = secClient.read() | |||||
| secClient.close() | |||||
| news_soup = soup(news_html, "html.parser") | |||||
| news_soup.decode('utf-8', 'ignore') | |||||
| news_containers = news_soup.findAll("div", {"data-component":"ArticleHeader"}) | |||||
| if len(news_containers)>0: | |||||
| news_title = news_containers[0].div.h1.text | |||||
| CurrentTitle.config(text = news_title) | |||||
| Labels.config(text = "Extracted Title!") | |||||
| else: | |||||
| print("ERROR! NO TITLE AT "+secondary_URL) | |||||
| Labels.config(text = "Failed to extract title") | |||||
| news_title = "Failed title extraction" | |||||
| news_body = news_soup.findAll("div", {"class":"article-text"}) | |||||
| print("\n TITLE: " + news_title) | |||||
| f.write("\n \n" + news_title + "\n") | |||||
| print("Now translating...") | |||||
| translatedQuery = translate(news_title, "ru", "en") | |||||
| t.write("\n \n" + translatedQuery + "\n") | |||||
| paragraphs = news_body[0].findAll("p") | |||||
| print("Title Recorded!") | |||||
| local_progress['value'] = 10 | |||||
| y = len(paragraphs) | |||||
| x = 0 | |||||
| fullText = "" | |||||
| fullText2 = "" | |||||
| for paragraph in paragraphs: | |||||
| x = x + 1 | |||||
| local_progress['value'] = x * 100 / y + 10 | |||||
| stringx = str(x) | |||||
| Labels.config(text = "Getting paragraph " + stringx + "...") | |||||
| print(paragraph.text + "\n \n \n") | |||||
| if x >= y/2: | |||||
| fullText2 = fullText2 + paragraph.text.strip() | |||||
| else: | |||||
| fullText = fullText + paragraph.text.strip() | |||||
| Labels.config(text = "Written and Translated Paragraph" + stringx + "!") | |||||
| print("Writing Paragraph " + stringx + "...") | |||||
| if self.needToSkip: | |||||
| break | |||||
| if self.needToSkip: | |||||
| self.needToSkip = False | |||||
| continue | |||||
| translatedQuery = translate(fullText, "ru", "en") | |||||
| completeText = translatedQuery | |||||
| translatedQuery = translate(fullText2, "ru", "en") | |||||
| completeText = completeText + translatedQuery | |||||
| f.write("\n" + fullText + fullText2) | |||||
| t.write("\n" + completeText) | |||||
| news_picture = news_soup.findAll("figure", {"data-component":"DefaultLede"}) | |||||
| if len(news_picture) == 0: | |||||
| news_picture = news_soup.findAll("figure") | |||||
| Labels.config(text = "Getting image...") | |||||
| if len(news_picture) > 0: | |||||
| if news_picture[0].img != None: | |||||
| article_pic = news_picture[0].img.get("src") | |||||
| Labels.config(text = "Picture recieved!") | |||||
| else: | |||||
| print("\n THIS ARTICLE HAS NO PICTURE! ") | |||||
| Labels.config(text = "Failed to locate picture :(") | |||||
| else: | else: | ||||
| print("\n THIS ARTICLE HAS NO PICTURE! ") | print("\n THIS ARTICLE HAS NO PICTURE! ") | ||||
| Labels.config(text = "Failed to locate picture :(") | Labels.config(text = "Failed to locate picture :(") | ||||
| @@ -348,6 +522,8 @@ texts = "VERGE SCRAPPER" | |||||
| root = Tk() | root = Tk() | ||||
| program = Scrapers() | program = Scrapers() | ||||
| mainT = Thread(target=program.start_now) | mainT = Thread(target=program.start_now) | ||||
| thread = Thread(target=sound) | |||||
| animthread = Thread(target=start_anim) | |||||
| try: | try: | ||||
| texts | texts | ||||
| except NameError: | except NameError: | ||||
| @@ -359,6 +535,7 @@ else: | |||||
| theLabel.pack() | theLabel.pack() | ||||
| print("FOUND TEXTS!") | print("FOUND TEXTS!") | ||||
| p = multiprocessing.Process(target=playsound, args=("tune.mp3",)) | |||||
| stop_thread = False | stop_thread = False | ||||
| topFrame = Frame(root) | topFrame = Frame(root) | ||||
| topFrame.pack() | topFrame.pack() | ||||
| @@ -372,4 +549,4 @@ button3.pack(side = TOP) | |||||
| button1.pack(side= TOP) | button1.pack(side= TOP) | ||||
| button4.pack(side= TOP) | button4.pack(side= TOP) | ||||
| button2.pack(side = TOP) | button2.pack(side = TOP) | ||||
| root.mainloop() | |||||
| root.mainloop() | |||||