From e06647b4b0b0f09f1febb4c703d0569611b75112 Mon Sep 17 00:00:00 2001 From: Eric Ciarla Date: Thu, 12 Dec 2024 14:41:11 -0500 Subject: [PATCH] Move full app examples to other repo --- .../automated_price_tracking/.env.example | 2 - .../.github/workflows/check_prices.yml | 33 ----- examples/automated_price_tracking/.gitignore | 1 - examples/automated_price_tracking/README.md | 31 ---- .../automated_price_tracking/check_prices.py | 49 ------- examples/automated_price_tracking/database.py | 134 ------------------ .../automated_price_tracking/notifications.py | 36 ----- .../automated_price_tracking/requirements.txt | 9 -- examples/automated_price_tracking/scraper.py | 38 ----- examples/automated_price_tracking/ui.py | 86 ----------- examples/automated_price_tracking/utils.py | 28 ---- 11 files changed, 447 deletions(-) delete mode 100644 examples/automated_price_tracking/.env.example delete mode 100644 examples/automated_price_tracking/.github/workflows/check_prices.yml delete mode 100644 examples/automated_price_tracking/.gitignore delete mode 100644 examples/automated_price_tracking/README.md delete mode 100644 examples/automated_price_tracking/check_prices.py delete mode 100644 examples/automated_price_tracking/database.py delete mode 100644 examples/automated_price_tracking/notifications.py delete mode 100644 examples/automated_price_tracking/requirements.txt delete mode 100644 examples/automated_price_tracking/scraper.py delete mode 100644 examples/automated_price_tracking/ui.py delete mode 100644 examples/automated_price_tracking/utils.py diff --git a/examples/automated_price_tracking/.env.example b/examples/automated_price_tracking/.env.example deleted file mode 100644 index 4a9dbf9a..00000000 --- a/examples/automated_price_tracking/.env.example +++ /dev/null @@ -1,2 +0,0 @@ -FIRECRAWL_API_KEY= -POSTGRES_URL= \ No newline at end of file diff --git a/examples/automated_price_tracking/.github/workflows/check_prices.yml b/examples/automated_price_tracking/.github/workflows/check_prices.yml deleted file mode 100644 index 5bd0e671..00000000 --- a/examples/automated_price_tracking/.github/workflows/check_prices.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Price Check - -on: - schedule: - # Runs every 6 hours - - cron: "0 0,6,12,18 * * *" - workflow_dispatch: # Allows manual triggering - -jobs: - check-prices: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.10" - cache: "pip" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Run price checker - env: - FIRECRAWL_API_KEY: ${{ secrets.FIRECRAWL_API_KEY }} - POSTGRES_URL: ${{ secrets.POSTGRES_URL }} - DISCORD_WEBHOOK_URL: ${{ secrets.DISCORD_WEBHOOK_URL }} - run: python check_prices.py diff --git a/examples/automated_price_tracking/.gitignore b/examples/automated_price_tracking/.gitignore deleted file mode 100644 index 1d17dae1..00000000 --- a/examples/automated_price_tracking/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.venv diff --git a/examples/automated_price_tracking/README.md b/examples/automated_price_tracking/README.md deleted file mode 100644 index 9ab50dbe..00000000 --- a/examples/automated_price_tracking/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Automated Price Tracking System - -A robust price tracking system that monitors product prices across e-commerce websites and notifies users of price changes through Discord. - -## Features - -- Automated price checking every 6 hours -- Support for multiple e-commerce platforms through Firecrawl API -- Discord notifications for price changes -- Historical price data storage in PostgreSQL database -- Interactive price history visualization with Streamlit - -## Setup - -1. Clone the repository -2. Install dependencies: - - ```bash - pip install -r requirements.txt - ``` - -3. Configure environment variables: - - ```bash - cp .env.example .env - ``` - - Then edit `.env` with your: - - Discord webhook URL - - Database credentials - - Firecrawl API key diff --git a/examples/automated_price_tracking/check_prices.py b/examples/automated_price_tracking/check_prices.py deleted file mode 100644 index 33a48843..00000000 --- a/examples/automated_price_tracking/check_prices.py +++ /dev/null @@ -1,49 +0,0 @@ -import os -import asyncio -from database import Database -from dotenv import load_dotenv -from firecrawl import FirecrawlApp -from scraper import scrape_product -from notifications import send_price_alert - -load_dotenv() - -db = Database(os.getenv("POSTGRES_URL")) -app = FirecrawlApp() - -# Threshold percentage for price drop alerts (e.g., 5% = 0.05) -PRICE_DROP_THRESHOLD = 0.05 - - -async def check_prices(): - products = db.get_all_products() - product_urls = set(product.url for product in products) - - for product_url in product_urls: - # Get the price history - price_history = db.get_price_history(product_url) - if not price_history: - continue - - # Get the earliest recorded price - earliest_price = price_history[-1].price - - # Retrieve updated product data - updated_product = scrape_product(product_url) - current_price = updated_product["price"] - - # Add the price to the database - db.add_price(updated_product) - print(f"Added new price entry for {updated_product['name']}") - - # Check if price dropped below threshold - if earliest_price > 0: # Avoid division by zero - price_drop = (earliest_price - current_price) / earliest_price - if price_drop >= PRICE_DROP_THRESHOLD: - await send_price_alert( - updated_product["name"], earliest_price, current_price, product_url - ) - - -if __name__ == "__main__": - asyncio.run(check_prices()) diff --git a/examples/automated_price_tracking/database.py b/examples/automated_price_tracking/database.py deleted file mode 100644 index 2aec92a8..00000000 --- a/examples/automated_price_tracking/database.py +++ /dev/null @@ -1,134 +0,0 @@ -from sqlalchemy import create_engine, Column, String, Float, DateTime, ForeignKey -from sqlalchemy.orm import sessionmaker, relationship, declarative_base -from datetime import datetime - -Base = declarative_base() - - -class Product(Base): - __tablename__ = "products" - - url = Column(String, primary_key=True) - prices = relationship( - "PriceHistory", back_populates="product", cascade="all, delete-orphan" - ) - - -class PriceHistory(Base): - __tablename__ = "price_histories" - - id = Column(String, primary_key=True) - product_url = Column(String, ForeignKey("products.url")) - name = Column(String, nullable=False) - price = Column(Float, nullable=False) - currency = Column(String, nullable=False) - main_image_url = Column(String) - timestamp = Column(DateTime, nullable=False) - product = relationship("Product", back_populates="prices") - - -class Database: - def __init__(self, connection_string): - self.engine = create_engine(connection_string) - Base.metadata.create_all(self.engine) - self.Session = sessionmaker(bind=self.engine) - - def add_product(self, url): - session = self.Session() - try: - # Create the product entry - product = Product(url=url) - session.merge(product) # merge will update if exists, insert if not - session.commit() - finally: - session.close() - - def product_exists(self, url): - session = self.Session() - try: - return session.query(Product).filter(Product.url == url).first() is not None - finally: - session.close() - - def add_price(self, product_data): - session = self.Session() - try: - # First ensure the product exists - if not self.product_exists(product_data["url"]): - # Create the product if it doesn't exist - product = Product(url=product_data["url"]) - session.add(product) - session.flush() # Flush to ensure the product is created before adding price - - # Convert timestamp string to datetime if it's a string - timestamp = product_data["timestamp"] - if isinstance(timestamp, str): - timestamp = datetime.strptime(timestamp, "%Y-%m-%d %H-%M") - - price_history = PriceHistory( - id=f"{product_data['url']}_{timestamp.strftime('%Y%m%d%H%M%S')}", - product_url=product_data["url"], - name=product_data["name"], - price=product_data["price"], - currency=product_data["currency"], - main_image_url=product_data["main_image_url"], - timestamp=timestamp, - ) - session.add(price_history) - session.commit() - finally: - session.close() - - def get_all_products(self): - session = self.Session() - try: - return session.query(Product).all() - finally: - session.close() - - def get_price_history(self, url): - """Get price history for a product""" - session = self.Session() - try: - return ( - session.query(PriceHistory) - .filter(PriceHistory.product_url == url) - .order_by(PriceHistory.timestamp.desc()) - .all() - ) - finally: - session.close() - - def remove_all_products(self): - session = self.Session() - try: - # First delete all price histories - session.query(PriceHistory).delete() - # Then delete all products - session.query(Product).delete() - session.commit() - finally: - session.close() - - # def remove_product(self, url): - # """Remove a product and its price history""" - # session = self.Session() - # try: - # product = session.query(Product).filter(Product.url == url).first() - # if product: - # session.delete( - # product - # ) # This will also delete associated price history due to cascade - # session.commit() - # finally: - # session.close() - - -if __name__ == "__main__": - from dotenv import load_dotenv - import os - - load_dotenv() - - db = Database(os.getenv("POSTGRES_URL")) - db.remove_all_products() diff --git a/examples/automated_price_tracking/notifications.py b/examples/automated_price_tracking/notifications.py deleted file mode 100644 index 2837fb70..00000000 --- a/examples/automated_price_tracking/notifications.py +++ /dev/null @@ -1,36 +0,0 @@ -from dotenv import load_dotenv -import os -import aiohttp -import asyncio - -load_dotenv() - - -async def send_price_alert( - product_name: str, old_price: float, new_price: float, url: str -): - """Send a price drop alert to Discord""" - drop_percentage = ((old_price - new_price) / old_price) * 100 - - message = { - "embeds": [ - { - "title": "Price Drop Alert! 🎉", - "description": f"**{product_name}**\nPrice dropped by {drop_percentage:.1f}%!\n" - f"Old price: ${old_price:.2f}\n" - f"New price: ${new_price:.2f}\n" - f"[View Product]({url})", - "color": 3066993, - } - ] - } - - try: - async with aiohttp.ClientSession() as session: - await session.post(os.getenv("DISCORD_WEBHOOK_URL"), json=message) - except Exception as e: - print(f"Error sending Discord notification: {e}") - - -if __name__ == "__main__": - asyncio.run(send_price_alert("Test Product", 100, 90, "https://www.google.com")) diff --git a/examples/automated_price_tracking/requirements.txt b/examples/automated_price_tracking/requirements.txt deleted file mode 100644 index 52f0541b..00000000 --- a/examples/automated_price_tracking/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -streamlit -firecrawl-py -pydantic -psycopg2-binary -python-dotenv -sqlalchemy==2.0.35 -pandas -plotly -aiohttp \ No newline at end of file diff --git a/examples/automated_price_tracking/scraper.py b/examples/automated_price_tracking/scraper.py deleted file mode 100644 index fc06b73e..00000000 --- a/examples/automated_price_tracking/scraper.py +++ /dev/null @@ -1,38 +0,0 @@ -from firecrawl import FirecrawlApp -from pydantic import BaseModel, Field -from datetime import datetime -from dotenv import load_dotenv - -load_dotenv() -app = FirecrawlApp() - - -class Product(BaseModel): - """Schema for creating a new product""" - - url: str = Field(description="The URL of the product") - name: str = Field(description="The product name/title") - price: float = Field(description="The current price of the product") - currency: str = Field(description="Currency code (USD, EUR, etc)") - main_image_url: str = Field(description="The URL of the main image of the product") - - -def scrape_product(url: str): - extracted_data = app.scrape_url( - url, - params={ - "formats": ["extract"], - "extract": {"schema": Product.model_json_schema()}, - }, - ) - - # Add the scraping date to the extracted data - extracted_data["extract"]["timestamp"] = datetime.utcnow() - - return extracted_data["extract"] - - -if __name__ == "__main__": - product = "https://www.amazon.com/gp/product/B002U21ZZK/" - - print(scrape_product(product)) diff --git a/examples/automated_price_tracking/ui.py b/examples/automated_price_tracking/ui.py deleted file mode 100644 index 11969897..00000000 --- a/examples/automated_price_tracking/ui.py +++ /dev/null @@ -1,86 +0,0 @@ -import os -import streamlit as st -import pandas as pd -import plotly.express as px - -from utils import is_valid_url -from database import Database -from dotenv import load_dotenv -from scraper import scrape_product - -load_dotenv() - -st.set_page_config(page_title="Price Tracker", page_icon="📊", layout="wide") - -with st.spinner("Loading database..."): - db = Database(os.getenv("POSTGRES_URL")) - - -# Set up sidebar -with st.sidebar: - st.title("Add New Product") - product_url = st.text_input("Product URL") - add_button = st.button("Add Product") - - if add_button: - if not product_url: - st.error("Please enter a product URL") - elif not is_valid_url(product_url): - st.error("Please enter a valid URL") - else: - db.add_product(product_url) - with st.spinner("Added product to database. Scraping product data..."): - product_data = scrape_product(product_url) - db.add_price(product_data) - st.success("Product is now being tracked!") - -# Main content -st.title("Price Tracker Dashboard") -st.markdown("## Tracked Products") - -# Get all products and their price histories -products = db.get_all_products() - -# Create a card for each product -for product in products: - price_history = db.get_price_history(product.url) - if price_history: - # Create DataFrame for plotting - df = pd.DataFrame( - [ - {"timestamp": ph.timestamp, "price": ph.price, "name": ph.name} - for ph in price_history - ] - ) - - # Create a card-like container for each product - with st.expander(df["name"][0], expanded=False): - st.markdown("---") - col1, col2 = st.columns([1, 3]) - - with col1: - if price_history[0].main_image_url: - st.image(price_history[0].main_image_url, width=200) - st.metric( - label="Current Price", - value=f"{price_history[0].price} {price_history[0].currency}", - ) - - with col2: - # Create price history plot - fig = px.line( - df, - x="timestamp", - y="price", - title=None, - ) - fig.update_layout( - xaxis_title=None, - yaxis_title="Price ($)", - showlegend=False, - margin=dict(l=0, r=0, t=0, b=0), - height=300, - ) - fig.update_xaxes(tickformat="%Y-%m-%d %H:%M", tickangle=45) - fig.update_yaxes(tickprefix="$", tickformat=".2f") - st.plotly_chart(fig, use_container_width=True) diff --git a/examples/automated_price_tracking/utils.py b/examples/automated_price_tracking/utils.py deleted file mode 100644 index c7af0a94..00000000 --- a/examples/automated_price_tracking/utils.py +++ /dev/null @@ -1,28 +0,0 @@ -from urllib.parse import urlparse -import re - - -def is_valid_url(url: str) -> bool: - try: - # Parse the URL - result = urlparse(url) - - # Check if scheme and netloc are present - if not all([result.scheme, result.netloc]): - return False - - # Check if scheme is http or https - if result.scheme not in ["http", "https"]: - return False - - # Basic regex pattern for domain validation - domain_pattern = ( - r"^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z]{2,})+$" - ) - if not re.match(domain_pattern, result.netloc): - return False - - return True - - except Exception: - return False