Setup the foundaries
Import libraries to use.
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import openai
import time
from selenium.common.exceptions import NoSuchElementException
import requests
import datetime
Setting up API keys of OpenAI and Discord (to receive news feeds in Discord message)
# API Key setup
openai.api_key = "Your api key"
DISCORD_WEBHOOK_URL = "Your Discord apu key"
Discord message send
def send_message(msg):
"""Send Discord Message"""
now = datetime.datetime.now()
message = {"content": f"[{now.strftime('%Y-%m-%d %H:%M:%S')}] {str(msg)}"}
requests.post(DISCORD_WEBHOOK_URL, data=message)
print(message)
Chrome crawling setup
# Chrome Driver setting
def set_chrome_driver(headless=True):
options = webdriver.ChromeOptions()
if headless:
options.add_argument('headless')
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36")
driver = webdriver.Chrome((ChromeDriverManager().install()), options=options)
return driver
# Crawling news feeds
def crawl_page(url):
try:
driver = set_chrome_driver(False)
driver.get(url)
# Be able to change the feature
article_page = driver.find_element(By.CLASS_NAME, 'articlePage')
text = article_page.text
driver.close()
except NoSuchElementException:
text = ""
return text
Prompt GPT
Request GPT to summarize the news feeds
def summarize(text):
# Choose Model Engine
model_engine = "text-davinci-003"
# Max Token
max_tokens = 1000
# Prompt (Summarize it!)
prompt = f'''Summarize the paragraph below and interpret whether it is a positive or negative sentiment.
{text}
'''
# Request summarizing
completion = openai.Completion.create(
engine=model_engine,
prompt=prompt,
max_tokens=max_tokens,
temperature=0.3, # creativity
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
return completion.choices[0].text
# Final wrapper
def summarize_news(url):
page = crawl_page(url)
summarized = summarize(page)
#send_message('[Summary]')
send_message(summarized)
# Crawling the most popular news
top5 = set_chrome_driver(False)
# Request URL
top5.get('https://www.investing.com/news/most-popular-news')
# Bringing 5 of news feeds
top5.find_element(By.CLASS_NAME, 'largeTitle').find_elements(By.CLASS_NAME, 'js-article-item')[:5]
# Crawling the 5 of URL news feeds
top5_links = []
for link in top5.find_element(By.CLASS_NAME, 'largeTitle').find_elements(By.CLASS_NAME, 'js-article-item')[:5]:
top5_links.append(link.find_element(By.CSS_SELECTOR, 'a').get_attribute('href'))
top5_summarize = []
for link in top5_links:
output = summarize_news(link)
top5_summarize.append(output)