import sqlite3 from datetime import datetime, date from typing import List, Optional from pydantic import BaseModel DB_NAME = "news.db" class Article(BaseModel): title: str url: str image_url: Optional[str] = None published_date: Optional[str] = None category: str source: str = "Yahoo Japan" collected_at: str = datetime.now().isoformat() content: Optional[str] = None def init_db(): conn = sqlite3.connect(DB_NAME) c = conn.cursor() c.execute(''' CREATE TABLE IF NOT EXISTS articles ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL, url TEXT UNIQUE NOT NULL, image_url TEXT, published_date TEXT, category TEXT, source TEXT, collected_at TEXT, content TEXT ) ''') conn.commit() conn.close() def save_article(article: Article): conn = sqlite3.connect(DB_NAME) c = conn.cursor() try: # Check if content column exists (for migration) cursor = c.execute("PRAGMA table_info(articles)") columns = [info[1] for info in cursor.fetchall()] if 'content' not in columns: c.execute("ALTER TABLE articles ADD COLUMN content TEXT") conn.commit() c.execute(''' INSERT INTO articles (title, url, image_url, published_date, category, source, collected_at, content) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(url) DO UPDATE SET content = excluded.content, image_url = excluded.image_url, published_date = excluded.published_date ''', (article.title, article.url, article.image_url, article.published_date, article.category, article.source, article.collected_at, article.content)) conn.commit() except Exception as e: print(f"Error saving article: {e}") finally: conn.close() def get_articles(category: Optional[str] = None, collection_date: Optional[str] = None, limit: int = 5) -> List[dict]: conn = sqlite3.connect(DB_NAME) conn.row_factory = sqlite3.Row c = conn.cursor() # Filter out articles without content query = "SELECT * FROM articles WHERE content IS NOT NULL AND content != '' AND content != 'Content not found.'" params = [] if category: query += " AND category = ?" params.append(category) if collection_date: # Assuming collection_date is 'YYYY-MM-DD' and collected_at is ISO format query += " AND date(collected_at) = ?" params.append(collection_date) else: # Default to today if no date specified? Or just get latest? # User said "collect news based on today". # But for viewing, maybe we just want the latest batch. # Let's order by collected_at desc pass query += " ORDER BY collected_at DESC LIMIT ?" params.append(limit) c.execute(query, tuple(params)) rows = c.fetchall() conn.close() return [dict(row) for row in rows] def get_available_dates() -> List[str]: conn = sqlite3.connect(DB_NAME) c = conn.cursor() # Extract distinct dates YYYY-MM-DD c.execute("SELECT DISTINCT date(collected_at) as date_val FROM articles ORDER BY date_val DESC") rows = c.fetchall() conn.close() return [row[0] for row in rows if row[0]]