瀏覽代碼

added thing

master
Madiwka3 3 年之前
父節點
當前提交
2e27e86f84
共有 3 個文件被更改,包括 186 次插入9 次删除
  1. +186
    -9
      GOOGLE.pyw
  2. +0
    -0
      dependencies.sh
  3. 二進制
      tune.mp3

+ 186
- 9
GOOGLE.pyw 查看文件

@@ -1,14 +1,15 @@
import tkinter
from tkinter import *
from os.path import expanduser
from random import randint
import multiprocessing
from tkinter import filedialog
from tkinter.ttk import Progressbar
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import io
from mtranslate import translate
import playsound
from playsound import playsound
import sys
import time
from threading import *
@@ -16,21 +17,57 @@ import os
from tkinter import messagebox
desktop = expanduser("~/Documents")
agency = "verge"
from asciimatics.effects import Cycle, Stars
from asciimatics.renderers import FigletText
from asciimatics.scene import Scene
from asciimatics.screen import Screen
def start_anim():
Screen.wrapper(demo)
def sound():
while True:
playsound('tune.mp3', block=True)
playsound(None)
def demo(screen):
effects = [
Cycle(
screen,
FigletText("ASCIIMATICS", font='big'),
int(screen.height / 2 - 8)),
Cycle(
screen,
FigletText("ROCKS!", font='big'),
int(screen.height / 2 + 3)),
Stars(screen, 200)
]
screen.play([Scene(effects, 500)])
def chooseDirectory():
currdir = os.getcwd()
tempdir = filedialog.askdirectory(parent=root, initialdir=currdir, title='Please select a directory')
program.directory = tempdir
currentAgency = 0
def switchAgencies(agencies):
print("called Agencies")
if agencies == "verge":
global currentAgency
currentAgency = currentAgency + 1
if currentAgency > 2:
currentAgency = 0
if currentAgency == 1:
print("switching to techradar")
agencies = "techradar"
else:
elif currentAgency == 2:
print("switching to engadget")
agencies = "engadget"
elif currentAgency == 0:
print("switching to verge")
agencies = "verge"
button4['text'] = agencies
class Scrapers(object):
def __init__(self):
global currentAgency
currentAgency = 0
self.thread1 = None
self.stop_threads = Event()
self.stopped = False
@@ -52,6 +89,8 @@ class Scrapers(object):
def skip(self):
self.needToSkip = True
def start_thread(self):
print("thread started")
Skip = Button(topFrame, text = "SKIP!", command = self.skip)
Skip.pack(side = BOTTOM)
try:
@@ -71,8 +110,11 @@ class Scrapers(object):
else:
self.thread1 = Thread(target = self.start_now)
self.thread1.start()
p.start()
threadActive = 1
def start_now(self):
print("Getting" + button4['text'])
if button4['text'] == "techradar":
progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate')
@@ -187,6 +229,7 @@ class Scrapers(object):
f.write("\n PICTURE URL: " + article_pic)
t.write("\n PICTURE URL: " + article_pic)
if self.stop_threads.is_set():
p.terminate()
print("I SURRENDER!")
self.stopped = True
f.close()
@@ -213,7 +256,7 @@ class Scrapers(object):
Labels.config(text = "All Done!")
f.close()
t.close()
else:
elif button4['text'] == "verge":
progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate')
progress['value'] = 0
progress.pack(side = TOP)
@@ -308,9 +351,140 @@ class Scrapers(object):
t.write("\n" + completeText)
news_picture = news_soup.findAll("picture", {"class":"c-picture"})
Labels.config(text = "Getting image...")
if news_picture[0].img != None:
article_pic = news_picture[0].img.get("src")
Labels.config(text = "Picture recieved!")
if (len(news_picture) > 0):
if news_picture[0].img != None:
article_pic = news_picture[0].img.get("src")
Labels.config(text = "Picture recieved!")
else:
print("\n THIS ARTICLE HAS NO PICTURE! ")
Labels.config(text = "Failed to locate picture :(")
local_progress['value'] = 120
f.write("\n PICTURE URL: " + article_pic)
t.write("\n PICTURE URL: " + article_pic)
if self.stop_threads.is_set():
print("I SURRENDER!")
self.stopped = True
f.close()
t.close()
self.CloseLabel.config(text = "you may close now")
sys.exit()
self.CloseLabel.config(text = "I tried, I failed")
break
else:
print("NOTHING IS STOPPING ME!")
Labels.config(text = "Finished the article!")
#brand = divWithInfo.div.a.img["title"]
#title_container = divWithInfo.find("a", "item-title")
#product_name = title_container.text
#shipping_container = divWithInfo.find("li", "price-ship")
#shipping_cost = shipping_container.text.strip()
#print("brand:"+brand)
#print("name:"+product_name)
#print("shipping:"+shipping_cost)
#print("\n")
#f.write(brand + "," + product_name.replace(",", "|") + "," + shipping_cost + "\n")
Labels.config(text = "All Done!")
f.close()
t.close()
else:
progress = Progressbar(topFrame, orient = HORIZONTAL, length = 100, mode = 'determinate')
progress['value'] = 0
progress.pack(side = TOP)
Labels = Label(topFrame, text = "SCRAPING")
Labels.pack(side = TOP)
texts = "change"
main_url = 'https://www.engadget.com/tomorrow'
uClient = uReq(main_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
containers = page_soup.findAll("article",{"data-component":"PostCard"})
Articles = len(containers)
filename = self.directory + "/News.txt"
trans_filename = self.directory + "/TranslatedNews.txt"
f = io.open(filename, "w", encoding="utf-8")
f.write("ACTIVE")
t = io.open(trans_filename, "w", encoding ="utf-8")
t.write("ACTIVE")
Labels.config(text = "setting file!")
i = 0
CurrentTitle = Label(topFrame, text = "Preparing...")
CurrentTitle.pack(side = TOP)
for container in containers:
i = i + 1
Labels.config(text = "jumping to URL!")
progress['value'] = i * 100 / Articles
local_progress = Progressbar(topFrame, orient = HORIZONTAL, length = 120, mode = 'determinate')
local_progress['value'] = 0
local_progress.pack(side = BOTTOM)
requiredURL = container.div.a["href"]
secondary_URL = 'https://www.engadget.com' + requiredURL
print("Set target URL!" + secondary_URL)
secClient = uReq(secondary_URL)
news_html = secClient.read()
secClient.close()
news_soup = soup(news_html, "html.parser")
news_soup.decode('utf-8', 'ignore')
news_containers = news_soup.findAll("div", {"data-component":"ArticleHeader"})
if len(news_containers)>0:
news_title = news_containers[0].div.h1.text
CurrentTitle.config(text = news_title)
Labels.config(text = "Extracted Title!")
else:
print("ERROR! NO TITLE AT "+secondary_URL)
Labels.config(text = "Failed to extract title")
news_title = "Failed title extraction"
news_body = news_soup.findAll("div", {"class":"article-text"})
print("\n TITLE: " + news_title)
f.write("\n \n" + news_title + "\n")
print("Now translating...")
translatedQuery = translate(news_title, "ru", "en")
t.write("\n \n" + translatedQuery + "\n")
paragraphs = news_body[0].findAll("p")
print("Title Recorded!")
local_progress['value'] = 10
y = len(paragraphs)
x = 0
fullText = ""
fullText2 = ""
for paragraph in paragraphs:
x = x + 1
local_progress['value'] = x * 100 / y + 10
stringx = str(x)
Labels.config(text = "Getting paragraph " + stringx + "...")
print(paragraph.text + "\n \n \n")
if x >= y/2:
fullText2 = fullText2 + paragraph.text.strip()
else:
fullText = fullText + paragraph.text.strip()
Labels.config(text = "Written and Translated Paragraph" + stringx + "!")
print("Writing Paragraph " + stringx + "...")
if self.needToSkip:
break
if self.needToSkip:
self.needToSkip = False
continue
translatedQuery = translate(fullText, "ru", "en")
completeText = translatedQuery
translatedQuery = translate(fullText2, "ru", "en")
completeText = completeText + translatedQuery
f.write("\n" + fullText + fullText2)
t.write("\n" + completeText)
news_picture = news_soup.findAll("figure", {"data-component":"DefaultLede"})
if len(news_picture) == 0:
news_picture = news_soup.findAll("figure")
Labels.config(text = "Getting image...")
if len(news_picture) > 0:
if news_picture[0].img != None:
article_pic = news_picture[0].img.get("src")
Labels.config(text = "Picture recieved!")
else:
print("\n THIS ARTICLE HAS NO PICTURE! ")
Labels.config(text = "Failed to locate picture :(")
else:
print("\n THIS ARTICLE HAS NO PICTURE! ")
Labels.config(text = "Failed to locate picture :(")
@@ -348,6 +522,8 @@ texts = "VERGE SCRAPPER"
root = Tk()
program = Scrapers()
mainT = Thread(target=program.start_now)
thread = Thread(target=sound)
animthread = Thread(target=start_anim)
try:
texts
except NameError:
@@ -359,6 +535,7 @@ else:
theLabel.pack()
print("FOUND TEXTS!")
p = multiprocessing.Process(target=playsound, args=("tune.mp3",))
stop_thread = False
topFrame = Frame(root)
topFrame.pack()
@@ -372,4 +549,4 @@ button3.pack(side = TOP)
button1.pack(side= TOP)
button4.pack(side= TOP)
button2.pack(side = TOP)
root.mainloop()
root.mainloop()

+ 0
- 0
dependencies.sh 查看文件


二進制
tune.mp3 查看文件


Loading…
取消
儲存