1 minute read

Setup the foundaries

Import libraries to use.

from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import openai
import time
from selenium.common.exceptions import NoSuchElementException
import requests
import datetime

Setting up API keys of OpenAI and Discord (to receive news feeds in Discord message)

# API Key setup
openai.api_key = "Your api key"

DISCORD_WEBHOOK_URL = "Your Discord apu key"

Discord message send

def send_message(msg):
    """Send Discord Message"""
    now = datetime.datetime.now()
    message = {"content": f"[{now.strftime('%Y-%m-%d %H:%M:%S')}] {str(msg)}"}
    requests.post(DISCORD_WEBHOOK_URL, data=message)
    print(message)

Chrome crawling setup

# Chrome Driver setting
def set_chrome_driver(headless=True):
    options = webdriver.ChromeOptions()
    if headless:
        options.add_argument('headless')
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36")
    driver = webdriver.Chrome((ChromeDriverManager().install()), options=options)
    return driver

# Crawling news feeds
def crawl_page(url):
    try:
        driver = set_chrome_driver(False)
        driver.get(url)
        # Be able to change the feature
        article_page = driver.find_element(By.CLASS_NAME, 'articlePage')
        text = article_page.text
        driver.close()
    except NoSuchElementException:
        text = ""
    return text

Prompt GPT

Request GPT to summarize the news feeds

def summarize(text):
    # Choose Model Engine
    model_engine = "text-davinci-003"

    # Max Token
    max_tokens = 1000

# Prompt (Summarize it!)
    prompt = f'''Summarize the paragraph below and interpret whether it is a positive or negative sentiment.

    {text}
    '''

    # Request summarizing
    completion = openai.Completion.create(
        engine=model_engine,
        prompt=prompt,
        max_tokens=max_tokens,
        temperature=0.3,       # creativity
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return completion.choices[0].text

# Final wrapper
def summarize_news(url):
    page = crawl_page(url)
    summarized = summarize(page)
    #send_message('[Summary]')
    send_message(summarized)
  
# Crawling the most popular news 
top5 = set_chrome_driver(False)
# Request URL
top5.get('https://www.investing.com/news/most-popular-news')
# Bringing 5 of news feeds
top5.find_element(By.CLASS_NAME, 'largeTitle').find_elements(By.CLASS_NAME, 'js-article-item')[:5]

# Crawling the 5 of URL news feeds
top5_links = []

for link in top5.find_element(By.CLASS_NAME, 'largeTitle').find_elements(By.CLASS_NAME, 'js-article-item')[:5]:
    top5_links.append(link.find_element(By.CSS_SELECTOR, 'a').get_attribute('href'))
    
top5_summarize = []

for link in top5_links:
    output = summarize_news(link)
    top5_summarize.append(output)

Categories:

Updated: