- Add docker-compose.yml for easy deployment - Configure DB_PATH environment variable for flexible database location - Mount ./data volume for data persistence across container restarts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
105 lines
3.4 KiB
Python
105 lines
3.4 KiB
Python
import sqlite3
|
|
import os
|
|
from datetime import datetime, date
|
|
from typing import List, Optional
|
|
from pydantic import BaseModel
|
|
|
|
DB_NAME = os.environ.get("DB_PATH", "news.db")
|
|
|
|
class Article(BaseModel):
|
|
title: str
|
|
url: str
|
|
image_url: Optional[str] = None
|
|
published_date: Optional[str] = None
|
|
category: str
|
|
source: str = "Yahoo Japan"
|
|
collected_at: str = datetime.now().isoformat()
|
|
content: Optional[str] = None
|
|
|
|
def init_db():
|
|
conn = sqlite3.connect(DB_NAME)
|
|
c = conn.cursor()
|
|
c.execute('''
|
|
CREATE TABLE IF NOT EXISTS articles (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
title TEXT NOT NULL,
|
|
url TEXT UNIQUE NOT NULL,
|
|
image_url TEXT,
|
|
published_date TEXT,
|
|
category TEXT,
|
|
source TEXT,
|
|
collected_at TEXT,
|
|
content TEXT
|
|
)
|
|
''')
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
def save_article(article: Article):
|
|
conn = sqlite3.connect(DB_NAME)
|
|
c = conn.cursor()
|
|
try:
|
|
# Check if content column exists (for migration)
|
|
cursor = c.execute("PRAGMA table_info(articles)")
|
|
columns = [info[1] for info in cursor.fetchall()]
|
|
if 'content' not in columns:
|
|
c.execute("ALTER TABLE articles ADD COLUMN content TEXT")
|
|
conn.commit()
|
|
|
|
c.execute('''
|
|
INSERT INTO articles (title, url, image_url, published_date, category, source, collected_at, content)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
ON CONFLICT(url) DO UPDATE SET
|
|
content = excluded.content,
|
|
image_url = excluded.image_url,
|
|
published_date = excluded.published_date
|
|
''', (article.title, article.url, article.image_url, article.published_date, article.category, article.source, article.collected_at, article.content))
|
|
conn.commit()
|
|
except Exception as e:
|
|
print(f"Error saving article: {e}")
|
|
finally:
|
|
conn.close()
|
|
|
|
def get_articles(category: Optional[str] = None, collection_date: Optional[str] = None, limit: int = 5) -> List[dict]:
|
|
conn = sqlite3.connect(DB_NAME)
|
|
conn.row_factory = sqlite3.Row
|
|
c = conn.cursor()
|
|
|
|
# Filter out articles without content
|
|
query = "SELECT * FROM articles WHERE content IS NOT NULL AND content != '' AND content != 'Content not found.'"
|
|
params = []
|
|
|
|
if category:
|
|
query += " AND category = ?"
|
|
params.append(category)
|
|
|
|
if collection_date:
|
|
# Assuming collection_date is 'YYYY-MM-DD' and collected_at is ISO format
|
|
query += " AND date(collected_at) = ?"
|
|
params.append(collection_date)
|
|
else:
|
|
# Default to today if no date specified? Or just get latest?
|
|
# User said "collect news based on today".
|
|
# But for viewing, maybe we just want the latest batch.
|
|
# Let's order by collected_at desc
|
|
pass
|
|
|
|
query += " ORDER BY collected_at DESC LIMIT ?"
|
|
params.append(limit)
|
|
|
|
c.execute(query, tuple(params))
|
|
rows = c.fetchall()
|
|
conn.close()
|
|
|
|
|
|
return [dict(row) for row in rows]
|
|
|
|
def get_available_dates() -> List[str]:
|
|
conn = sqlite3.connect(DB_NAME)
|
|
c = conn.cursor()
|
|
# Extract distinct dates YYYY-MM-DD
|
|
c.execute("SELECT DISTINCT date(collected_at) as date_val FROM articles ORDER BY date_val DESC")
|
|
rows = c.fetchall()
|
|
conn.close()
|
|
return [row[0] for row in rows if row[0]]
|