Madiwka3 преди 3 години
родител
ревизия
2e27e86f84
променени са 3 файла, в които са добавени 186 реда и са изтрити 9 реда
  1. +186
    -9
      GOOGLE.pyw
  2. +0
    -0
      dependencies.sh
  3. Двоични данни
      tune.mp3

+ 186
- 9
GOOGLE.pyw Целия файл

@@ -1,14 +1,15 @@
import tkinter
from tkinter import *
from os.path import expanduser
from random import randint
import multiprocessing
from tkinter import filedialog
from tkinter.ttk import Progressbar
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import io
from mtranslate import translate
import playsound
from playsound import playsound
import sys
import time
from threading import *
@@ -16,21 +17,57 @@ import os
from tkinter import messagebox
desktop = expanduser("~/Documents")
agency = "verge"
from asciimatics.effects import Cycle, Stars
from asciimatics.renderers import FigletText
from asciimatics.scene import Scene
from asciimatics.screen import Screen
def start_anim():
Screen.wrapper(demo)
def sound():
while True:
playsound('tune.mp3', block=True)
playsound(None)
def demo(screen):
effects = [
Cycle(
screen,
FigletText("ASCIIMATICS", font='big'),
int(screen.height / 2 - 8)),
Cycle(
screen,
FigletText("ROCKS!", font='big'),
int(screen.height / 2 + 3)),
Stars(screen, 200)
]
screen.play([Scene(effects, 500)])
def chooseDirectory():
currdir = os.getcwd()
tempdir = filedialog.askdirectory(parent=root, initialdir=currdir, title='Please select a directory')
program.directory = tempdir
currentAgency = 0
def switchAgencies(agencies):
print("called Agencies")
if agencies == "verge":
global currentAgency
currentAgency = currentAgency + 1
if currentAgency > 2:
currentAgency = 0
if currentAgency == 1:
print("switching to techradar")
agencies = "techradar"
else:
elif currentAgency == 2:
print("switching to engadget")
agencies = "engadget"
elif currentAgency == 0:
print("switching to verge")
agencies = "verge"
button4['text'] = agencies
class Scrapers(object):
def __init__(self):
global currentAgency
currentAgency = 0
self.thread1 = None
self.stop_threads = Event()
self.stopped = False
@@ -52,6 +89,8 @@ class Scrapers(object):
def skip(self):
self.needToSkip = True
def start_thread(self):
print("thread started")
Skip = Button(topFrame, text = "SKIP!", command = self.skip)
Skip.pack(side = BOTTOM)
try:
@@ -71,8 +110,11 @@ class Scrapers(object):
else:
self.thread1 = Thread(target = self.start_now)
self.thread1.start()
p.start()
threadActive = 1
def start_now(self):
print("Getting" + button4['text'])
if button4['text'] == "techradar":
progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate')
@@ -187,6 +229,7 @@ class Scrapers(object):
f.write("\n PICTURE URL: " + article_pic)
t.write("\n PICTURE URL: " + article_pic)
if self.stop_threads.is_set():
p.terminate()
print("I SURRENDER!")
self.stopped = True
f.close()
@@ -213,7 +256,7 @@ class Scrapers(object):
Labels.config(text = "All Done!")
f.close()
t.close()
else:
elif button4['text'] == "verge":
progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate')
progress['value'] = 0
progress.pack(side = TOP)
@@ -308,9 +351,140 @@ class Scrapers(object):
t.write("\n" + completeText)
news_picture = news_soup.findAll("picture", {"class":"c-picture"})
Labels.config(text = "Getting image...")
if news_picture[0].img != None:
article_pic = news_picture[0].img.get("src")
Labels.config(text = "Picture recieved!")
if (len(news_picture) > 0):
if news_picture[0].img != None:
article_pic = news_picture[0].img.get("src")
Labels.config(text = "Picture recieved!")
else:
print("\n THIS ARTICLE HAS NO PICTURE! ")
Labels.config(text = "Failed to locate picture :(")
local_progress['value'] = 120
f.write("\n PICTURE URL: " + article_pic)
t.write("\n PICTURE URL: " + article_pic)
if self.stop_threads.is_set():
print("I SURRENDER!")
self.stopped = True
f.close()
t.close()
self.CloseLabel.config(text = "you may close now")
sys.exit()
self.CloseLabel.config(text = "I tried, I failed")
break
else:
print("NOTHING IS STOPPING ME!")
Labels.config(text = "Finished the article!")
#brand = divWithInfo.div.a.img["title"]
#title_container = divWithInfo.find("a", "item-title")
#product_name = title_container.text
#shipping_container = divWithInfo.find("li", "price-ship")
#shipping_cost = shipping_container.text.strip()
#print("brand:"+brand)
#print("name:"+product_name)
#print("shipping:"+shipping_cost)
#print("\n")
#f.write(brand + "," + product_name.replace(",", "|") + "," + shipping_cost + "\n")
Labels.config(text = "All Done!")
f.close()
t.close()
else:
progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate')
progress['value'] = 0
progress.pack(side = TOP)
Labels = Label(topFrame, text = "SCRAPING")
Labels.pack(side = TOP)
texts = "change"
main_url = 'https://www.engadget.com/tomorrow'
uClient = uReq(main_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
containers = page_soup.findAll("article",{"data-component":"PostCard"})
Articles = len(containers)
filename = self.directory + "/News.txt"
trans_filename = self.directory + "/TranslatedNews.txt"
f = io.open(filename, "w", encoding="utf-8")
f.write("ACTIVE")
t = io.open(trans_filename, "w", encoding ="utf-8")
t.write("ACTIVE")
Labels.config(text = "setting file!")
i = 0
CurrentTitle = Label(topFrame, text = "Preparing...")
CurrentTitle.pack(side = TOP)
for container in containers:
i = i + 1
Labels.config(text = "jumping to URL!")
progress['value'] = i * 100 / Articles
local_progress = Progressbar(topFrame, orient = HORIZONTAL, length = 120, mode = 'determinate')
local_progress['value'] = 0
local_progress.pack(side = BOTTOM)
requiredURL = container.div.a["href"]
secondary_URL = 'https://www.engadget.com' + requiredURL
print("Set target URL!" + secondary_URL)
secClient = uReq(secondary_URL)
news_html = secClient.read()
secClient.close()
news_soup = soup(news_html, "html.parser")
news_soup.decode('utf-8', 'ignore')
news_containers = news_soup.findAll("div", {"data-component":"ArticleHeader"})
if len(news_containers)>0:
news_title = news_containers[0].div.h1.text
CurrentTitle.config(text = news_title)
Labels.config(text = "Extracted Title!")
else:
print("ERROR! NO TITLE AT "+secondary_URL)
Labels.config(text = "Failed to extract title")
news_title = "Failed title extraction"
news_body = news_soup.findAll("div", {"class":"article-text"})
print("\n TITLE: " + news_title)
f.write("\n \n" + news_title + "\n")
print("Now translating...")
translatedQuery = translate(news_title, "ru", "en")
t.write("\n \n" + translatedQuery + "\n")
paragraphs = news_body[0].findAll("p")
print("Title Recorded!")
local_progress['value'] = 10
y = len(paragraphs)
x = 0
fullText = ""
fullText2 = ""
for paragraph in paragraphs:
x = x + 1
local_progress['value'] = x * 100 / y + 10
stringx = str(x)
Labels.config(text = "Getting paragraph " + stringx + "...")
print(paragraph.text + "\n \n \n")
if x >= y/2:
fullText2 = fullText2 + paragraph.text.strip()
else:
fullText = fullText + paragraph.text.strip()
Labels.config(text = "Written and Translated Paragraph" + stringx + "!")
print("Writing Paragraph " + stringx + "...")
if self.needToSkip:
break
if self.needToSkip:
self.needToSkip = False
continue
translatedQuery = translate(fullText, "ru", "en")
completeText = translatedQuery
translatedQuery = translate(fullText2, "ru", "en")
completeText = completeText + translatedQuery
f.write("\n" + fullText + fullText2)
t.write("\n" + completeText)
news_picture = news_soup.findAll("figure", {"data-component":"DefaultLede"})
if len(news_picture) == 0:
news_picture = news_soup.findAll("figure")
Labels.config(text = "Getting image...")
if len(news_picture) > 0:
if news_picture[0].img != None:
article_pic = news_picture[0].img.get("src")
Labels.config(text = "Picture recieved!")
else:
print("\n THIS ARTICLE HAS NO PICTURE! ")
Labels.config(text = "Failed to locate picture :(")
else:
print("\n THIS ARTICLE HAS NO PICTURE! ")
Labels.config(text = "Failed to locate picture :(")
@@ -348,6 +522,8 @@ texts = "VERGE SCRAPPER"
root = Tk()
program = Scrapers()
mainT = Thread(target=program.start_now)
thread = Thread(target=sound)
animthread = Thread(target=start_anim)
try:
texts
except NameError:
@@ -359,6 +535,7 @@ else:
theLabel.pack()
print("FOUND TEXTS!")
p = multiprocessing.Process(target=playsound, args=("tune.mp3",))
stop_thread = False
topFrame = Frame(root)
topFrame.pack()
@@ -372,4 +549,4 @@ button3.pack(side = TOP)
button1.pack(side= TOP)
button4.pack(side= TOP)
button2.pack(side = TOP)
root.mainloop()
root.mainloop()

+ 0
- 0
dependencies.sh Целия файл


Двоични данни
tune.mp3 Целия файл


Зареждане…
Отказ
Запис