import sys sys.path.append(".") from services.ppt_generator.data_classes.project import Project from utils.dynamo_db import DynamoDB from utils.webscrape.sitemap_scrape import SitemapScrape db = DynamoDB() project = db.get_item(db.projects, "tequity_1") project = Project(**project) # Scrape the website scrape = SitemapScrape(project.company_url, summarize=True) # upload the website to dynamoDB for url in scrape.urls_scraped: db.upload_to_dynamodb(db.web_pages, { "project_id": project.project_id, "url": url, "text": "\n".join(scrape.urls_scraped[url]['text']), "summary": scrape.urls_scraped[url]['summary'], }) db.upload_to_dynamodb(db.web_pages, { "project_id": project.project_id, "url": "images", "text": "images", "images": scrape.image_urls })