import docx
import requests
from openai import OpenAI
import os
import openai
from Levenshtein import ratio
from docx.shared import Pt
from bs4 import BeautifulSoup
import io
from docx.oxml.shared import OxmlElement, qn
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
WEBZ_API_KEY = os.getenv(“WEBZ_API_KEY”)
openai.api_key = os.getenv(“OPENAI_API_KEY”)
NUM_OF_REPORTS = 5
client = OpenAI()
def are_similar(str1, str2, threshold=1):
“””
Check if two strings are similar based on Levenshtein ratio.
“””
return ratio(str1, str2) > threshold
def remove_similar_strings(articles):
unique_articles = []
for article in articles:
if not any(are_similar(article[‘text’], existing[‘text’], 0.7) for existing in unique_articles):
unique_articles.append(article)
return unique_articles
def trim_string(string, max_length):
if len(string) > max_length:
return string[:max_length]
else:
return string
# Function to get news articles from Webz.io API
def fetch_articles(query, api_key, total):
endpoint = f“https://api.webz.io/filterWebContent?token={api_key}&format=json&q={query}&size=100&ts=0”
all_posts = []
while total > 0:
response = requests.get(endpoint)
data = response.json()
posts = data[“posts”]
if len(posts) == 0:
break
all_posts.extend(posts)
total -= len(posts)
if total > 0 and “next” in data:
endpoint = f“https://api.webz.io{data[‘next’]}”
else:
break
articles = []
for article in all_posts:
article = {‘title’: article[“title”],
‘text’: trim_string(trim_title(article[“title”]) + “\n\n” + article[“text”], 10000),
‘link’: article[‘url’],
‘published’: article[‘published’]}
articles.append(article)
return articles
def trim_title(input_string):
words = input_string.split()
if “|” in input_string:
return input_string.split(“|”)[0]
last_dash_index = input_string.rfind(“-“)
if last_dash_index != –1:
right_of_dash = input_string[last_dash_index + 1:]
right_words = right_of_dash.split()
if len(right_words) <= 3 and len(words) > 10:
return input_string[:last_dash_index]
return input_string
def add_image_from_base64(doc, image_url):
response = requests.get(image_url)
# Check if the request was successful
if response.status_code == 200:
image_stream = io.BytesIO(response.content)
doc.add_picture(image_stream, width=docx.shared.Inches(6))
else:
print(f“Failed to download image. Status code: {response.status_code}”)
def html_to_word(doc, html_content):
soup = BeautifulSoup(html_content, ‘html.parser’)
for element in soup.find_all([‘b’, ‘ul’]):
if element.name == ‘b’:
# Add bold text as a heading
doc.add_paragraph(element.get_text(), style=‘Heading 2’)
elif element.name == ‘ul’:
for item in element.find_all(‘li’):
# Add list items
doc.add_paragraph(item.get_text(), style=‘List Bullet’)
def add_hyperlink(paragraph, url, text):
“””
Add a hyperlink to a paragraph.
“””
part = paragraph.part
r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)
hyperlink = OxmlElement(‘w:hyperlink’)
hyperlink.set(qn(‘r:id’), r_id, )
new_run = OxmlElement(‘w:r’)
rPr = OxmlElement(‘w:rPr’)
u = OxmlElement(‘w:u’)
u.set(qn(‘w:val’), ‘single’)
rPr.append(u)
u = OxmlElement(‘w:u’)
u.set(qn(‘w:val’), ‘single’)
rPr.append(u)
new_run.append(rPr)
new_run.text = text
hyperlink.append(new_run)
paragraph._p.append(hyperlink)
return hyperlink
def insert_titles_in_text(text, reports):
# Placeholder for inserting the titles
placeholder = “[]”
# Extracting the titles from the reports and formatting them with new lines
titles = “\n”.join([report[‘title’] for report in reports])
# Replacing the placeholder with the titles
updated_text = text.replace(placeholder, titles)
return updated_text
def generate_article_image():
print(“Generating post image”)
image_url = “”
try:
response = client.images.generate(
model=“dall-e-3”,
prompt=“Create an image for the cover of a supply chain risk report digest. The image should feature a stylized, semi-abstract illustration of a global supply chain network. Imagine a globe in the center, with lines and nodes representing international trade routes and supply nodes. Overlay this with subtle, translucent icons symbolizing various risks such as a storm cloud for natural disasters, a broken chain link for disruptions, and a padlock for cybersecurity threats. The color scheme should be professional and muted, with blues, grays, and occasional red accents to highlight risks. The overall tone should be sophisticated and informative, suitable for a corporate report. The image should be in a landscape orientation.”,
n=1,
size=“1024×1024”
)
image_url = response.data[0].url
except Exception as e:
print(“An error occurred generating the image:”, str(e))
return image_url
def get_unique_posts_from_webz(query):
print(“Fetch posts from Webz.io”)
articles = fetch_articles(query, WEBZ_API_KEY, 100)
filtered_articles = remove_similar_strings(articles)
return filtered_articles
def call_gpt_completion(prompt):
return client.chat.completions.create(
model=“gpt-4-1106-preview”,
max_tokens=4096,
messages=[
{“role”: “user”, “content”: prompt},
]
)
def generate_reports(filtered_articles):
print(“Generating Reports”)
reports = []
for article in filtered_articles:
print(f“Creating report about: {article[‘title’]}”)
prompt = f“””Carefully review the following news article between the [] brackets and determine if there is an explicit discussion about a supply chain risk from its content. The article is as follows:
[
{article[‘text’]}
]
If the article explicitly mentions or clearly discusses a supply chain risk, generate a detailed report in HTML format. Use <B> tags to highlight the titles of each section and <UL> and <LI> tags for listing items. The report should include the following sections:
<HTML>
<B> Summary of the Incident </B>
<UL>
<LI> Briefly describe the specific supply chain disruption or risk highlighted in the article. This may include details such as the nature of the risk, the affected industries or companies, and the geographical locations involved.</LI>
</UL>
<B> Background Information </B>
<UL>
<LI> Provide context about the involved parties (e.g., companies, countries) and the specific supply chain elements at risk (e.g., raw materials, manufacturing, logistics). This section should also cover any relevant history or prior incidents that relate to the current risk. </LI>
</UL>
<B> Risk Analysis </B>
<UL>
<LI> Nature of the Risk: Define whether it is a logistical, geopolitical, environmental, technological, or market-related risk </LI>
<LI> Impact Assessment: Evaluate the potential impact on various stakeholders, such as businesses, consumers, and economies. Consider short-term and long-term effects. </LI>
<LI> Probability and Severity: Estimate the likelihood of the risk materializing and its potential severity. </LI>
</UL>
<B> Current Responses and Strategies </B>
<UL>
<LI> Measures Taken: Outline any current responses or mitigation strategies employed by affected companies or governments. </LI>
<LI> Effectiveness: Assess the effectiveness of these strategies and whether they address the root causes of the risk. </LI>
</UL>
<B> Future Implications </B>
<UL>
<LI> Forecasting: Predict potential future developments and implications of the risk. </LI>
<LI> Long-term Strategies: Suggest long-term strategies for companies and governments to mitigate similar risks in the future. </LI>
</UL>
<B> Regulatory Concerns </B>
<UL>
<LI> Mention any regulatory approvals and antitrust concerns, legal implications and compliance requirements.</LI>
</UL>
<B> Broader Industry and Economic Context </B>
<UL>
<LI> Discuss how this specific risk fits into wider industry trends and economic conditions.</LI>
</UL>
<B> Similar Risks </B>
<UL>
<LI> Analyze the potential for similar risks in other sectors or regions.</LI>
</UL>
<B> Recommendations </B>
<UL>
<LI> Provide actionable recommendations for stakeholders to better prepare for, mitigate, or respond to such risks. </LI>
<LI> Highlight any opportunities for improvement in supply chain management or policy changes. </LI>
</UL>
</HTML>
If the article does not explicitly mention or discuss a supply chain risk, please respond with: can’t produce report.
“””
try:
response = call_gpt_completion(prompt)
report = {‘text’: ”}
for choice in response.choices:
report[‘text’] += choice.message.content
if “Summary of the Incident” in report[‘text’]:
report[‘link’] = article[‘link’]
report[‘title’] = article[‘title’]
report[‘published’] = article[‘published’]
reports.append(report)
print(f“Created a report about: {article[‘title’]}”)
else:
print(f“Can’t product report for: {article[‘title’]}”)
if len(reports) == NUM_OF_REPORTS:
break
except Exception as e:
print(“An error occurred:”, str(e))
return reports
def generate_intro(reports):
print(“Generate post intro”)
prompt = “””
Write a paragraph introducing a digest that contains supply-chain risk reports about the following titles, don’t elaborate on these titles:
[]
The reports are created automatically by using Webz.io news api and ChatGPT. The reports are generated by calling the Webz.io news API for news articles about supply chain issues. The matching news articles are then run through a ChatGPT prompt to analyze if the article is indeed about a supply chain issue. If so it creates a structured report.
“””
prompt = insert_titles_in_text(prompt, reports)
intro = “”
try:
response = call_gpt_completion(prompt)
for choice in response.choices:
intro += choice.message.content
except Exception as e:
print(“An error occurred:”, str(e))
return intro
def generate_title(intro):
print(“Creating a title”)
prompt = “Create a title using the following text as a context:\n” + intro
title_text = “”
try:
response = call_gpt_completion(prompt)
for choice in response.choices:
title_text += choice.message.content
except Exception as e:
print(“An error occurred:”, str(e))
title_text = title_text.strip(” “).strip(‘\”‘)
if title_text.startswith(“Title:”): # Sometimes ChatGPT prefix the title with Title:
return title_text[len(“Title:”):]
return title_text
def create_word_doc(file_name, title_text, image_url, intro, reports):
print(“Saving to word document”)
doc = docx.Document()
# Add a title
title = doc.add_paragraph()
title.style = ‘Title’
title_run = title.add_run(title_text)
title_run.font.size = Pt(24) # Set the font size
title_run.font.name = ‘Arial (Body)’ # Set the font
title.alignment = WD_ALIGN_PARAGRAPH.CENTER # Center align the title
if len(image_url) > 0:
add_image_from_base64(doc, image_url)
doc.add_paragraph(intro)
# Add each report
for report in reports:
p = doc.add_paragraph(style=‘Heading 1’)
add_hyperlink(p, report[‘link’], report[‘title’])
doc.add_paragraph(f“Published on: {report[‘published’]}”)
html_to_word(doc, report[‘text’])
doc.add_paragraph(“””
“””)
for paragraph in doc.paragraphs:
for run in paragraph.runs:
run.font.name = ‘Arial (Body)’
# Save the document
doc.save(file_name)
def main():
image_url = generate_article_image()
filtered_articles = get_unique_posts_from_webz(
“””(sentiment:negative) AND ((“supply chain” OR “fulfillment center”) OR (title:(cargo OR warehouse OR shippers OR inventory OR suppliers OR fulfillment)) OR (thread.section_title:”supply chain”) OR (site:*supply*))”””)
reports = generate_reports(filtered_articles)
intro = generate_intro(reports)
title_text = generate_title(intro)
create_word_doc(“Supply-chain risk report digest.docx”, title_text, image_url, intro, reports)
if __name__ == “__main__”:
main()