Files
collect-japan-news/database.py
kihong.kim 581ea49a75 Add docker-compose support with data persistence
- Add docker-compose.yml for easy deployment
- Configure DB_PATH environment variable for flexible database location
- Mount ./data volume for data persistence across container restarts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 16:14:06 +09:00

105 lines
3.4 KiB
Python

import sqlite3
import os
from datetime import datetime, date
from typing import List, Optional
from pydantic import BaseModel
DB_NAME = os.environ.get("DB_PATH", "news.db")
class Article(BaseModel):
title: str
url: str
image_url: Optional[str] = None
published_date: Optional[str] = None
category: str
source: str = "Yahoo Japan"
collected_at: str = datetime.now().isoformat()
content: Optional[str] = None
def init_db():
conn = sqlite3.connect(DB_NAME)
c = conn.cursor()
c.execute('''
CREATE TABLE IF NOT EXISTS articles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
url TEXT UNIQUE NOT NULL,
image_url TEXT,
published_date TEXT,
category TEXT,
source TEXT,
collected_at TEXT,
content TEXT
)
''')
conn.commit()
conn.close()
def save_article(article: Article):
conn = sqlite3.connect(DB_NAME)
c = conn.cursor()
try:
# Check if content column exists (for migration)
cursor = c.execute("PRAGMA table_info(articles)")
columns = [info[1] for info in cursor.fetchall()]
if 'content' not in columns:
c.execute("ALTER TABLE articles ADD COLUMN content TEXT")
conn.commit()
c.execute('''
INSERT INTO articles (title, url, image_url, published_date, category, source, collected_at, content)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(url) DO UPDATE SET
content = excluded.content,
image_url = excluded.image_url,
published_date = excluded.published_date
''', (article.title, article.url, article.image_url, article.published_date, article.category, article.source, article.collected_at, article.content))
conn.commit()
except Exception as e:
print(f"Error saving article: {e}")
finally:
conn.close()
def get_articles(category: Optional[str] = None, collection_date: Optional[str] = None, limit: int = 5) -> List[dict]:
conn = sqlite3.connect(DB_NAME)
conn.row_factory = sqlite3.Row
c = conn.cursor()
# Filter out articles without content
query = "SELECT * FROM articles WHERE content IS NOT NULL AND content != '' AND content != 'Content not found.'"
params = []
if category:
query += " AND category = ?"
params.append(category)
if collection_date:
# Assuming collection_date is 'YYYY-MM-DD' and collected_at is ISO format
query += " AND date(collected_at) = ?"
params.append(collection_date)
else:
# Default to today if no date specified? Or just get latest?
# User said "collect news based on today".
# But for viewing, maybe we just want the latest batch.
# Let's order by collected_at desc
pass
query += " ORDER BY collected_at DESC LIMIT ?"
params.append(limit)
c.execute(query, tuple(params))
rows = c.fetchall()
conn.close()
return [dict(row) for row in rows]
def get_available_dates() -> List[str]:
conn = sqlite3.connect(DB_NAME)
c = conn.cursor()
# Extract distinct dates YYYY-MM-DD
c.execute("SELECT DISTINCT date(collected_at) as date_val FROM articles ORDER BY date_val DESC")
rows = c.fetchall()
conn.close()
return [row[0] for row in rows if row[0]]