from fastapi import APIRouter, HTTPException, Request, Depends
from fastapi.responses import RedirectResponse
import openai
from openai import OpenAI
import re
import boto3
import os
import time
import db_module
from schemas import ChatMessage, SpeechText, QuizMessage, LineUser, MedicineText, UserQuestion
from config import logger, openai_api_key
import httpx
import json
import pandas as pd
import PyPDF2
from sklearn.feature_extraction.text import TfidfVectorizer
import logging
import requests
from bs4 import BeautifulSoup
import urllib.parse
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np


router = APIRouter()

openai.api_key = openai_api_key
client = OpenAI(api_key=openai_api_key)

s3_client = boto3.client('s3')
bucket_name = 'shanri-ai-chatbot-for-text-to-speech'


async def ask_openai(messages):
    valid_messages = [msg for msg in messages if msg.get('content')]
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=valid_messages
    )
    answer = response.choices[0].message.content.strip()
    return answer


async def synthesize_speech(text, user_id):
    response = client.audio.speech.create(
        model="tts-1",
        voice="nova",
        input=text,
    )
    audio_file = f"tmp/audio-{user_id}-{time.time()}.mp3"
    with open(audio_file, 'wb') as f:
        for chunk in response.iter_bytes():
            f.write(chunk)
    s3_key = f"{user_id}-{time.time()}.mp3"
    s3_client.upload_file(audio_file, bucket_name, s3_key)
    os.remove(audio_file)
    return f"https://{bucket_name}.s3.amazonaws.com/{s3_key}"


# ✅ 특정 웹사이트(ug-inc.net) 크롤링 함수
async def fetch_website_data(url):
    """특정 URL에서 정보를 가져옴"""
    try:
        headers = {"User-Agent": "Mozilla/5.0"}
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")
        paragraphs = soup.find_all("p")
        text = "\n".join([p.get_text() for p in paragraphs])
        return text
    except requests.RequestException as e:
        print(f"웹사이트 정보 가져오기 실패: {e}")
        return ""


# ✅ 내부 페이지 포함하여 크롤링하는 함수
async def fetch_all_pages(base_url):
    """홈페이지에서 내부 링크를 찾아 전체 페이지 크롤링"""
    try:
        headers = {"User-Agent": "Mozilla/5.0"}
        response = requests.get(base_url, headers=headers, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")
        links = {base_url}
        for a_tag in soup.find_all("a", href=True):
            href = a_tag["href"]
            full_url = urllib.parse.urljoin(base_url, href)
            if full_url.startswith(base_url):
                links.add(full_url)

        all_text = ""
        for url in links:
            print(f"크롤링 중: {url}")
            all_text += fetch_website_data(url) + "\n\n"
        return all_text
    except requests.RequestException as e:
        print(f"웹사이트 크롤링 실패: {e}")
        return ""


# ✅ 질문과 관련 있는 텍스트 추출 함수
async def find_relevant_text(question, text_data, max_sentences=20, max_length=2000):
    """질문과 관련 있는 텍스트를 상위 20개 문장으로 제한"""
    sentences = text_data.split("\n")
    vectorizer = TfidfVectorizer().fit_transform([question] + sentences)
    similarities = (vectorizer * vectorizer.T).toarray()[0][1:]

    # 유사도가 높은 문장 정렬 후 상위 max_sentences 개만 선택
    top_indices = np.argsort(similarities)[-max_sentences:]
    top_sentences = [sentences[i] for i in top_indices]
    return "\n".join(top_sentences)[:max_length]


# ✅ GPT에게 질문하고 답변을 생성하는 함수 (웹 크롤링 + OpenAI 웹 검색 기능 활용)
def generate_gpt_answer(question):
    """GPT-4o를 사용해 ug-inc.net 크롤링 데이터 + 웹 검색 정보를 포함한 답변 생성"""
    website_data = fetch_all_pages("https://www.ug-inc.net/")
    relevant_text = find_relevant_text(question, website_data) if website_data else ""

    # ✅ OpenAI 웹 검색 실행
    web_search_response = client.responses.create(
        model="gpt-4o",
        tools=[{"type": "web_search_preview"}],
        input="Unite & Grow " + question
    )

    print(web_search_response.output_text)

    web_search_text = web_search_response.output_text if web_search_response.output_text else ""

    messages = [
        {"role": "system", "content": "与えられた質問を見て、関連情報を取得して回答を作成してください。"},
        {"role": "system", "content": "回答に何を参照したかは言わないでください。"}
    ]

    if relevant_text:
        messages.append(
            {"role": "system", "content": f"以下の企業公式サイトの情報を参考にしてください。\n{relevant_text}"})

    if web_search_text:
        messages.append(
            {"role": "system", "content": f"以下のウェブ検索の情報を参考にしてください。\n{web_search_text}"})

    messages.append({"role": "user", "content": question})

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages
    )

    return response.choices[0].message.content


@router.get("/health")
async def health_check():
    return {"status": "healthy"}


@router.post("/api/speech")
async def speech(speech_text: SpeechText):
    text = speech_text.text
    chat_token = speech_text.chat_token
    if not text:
        raise HTTPException(status_code=400, detail="Text is required")
    audio_file = await synthesize_speech(text, chat_token)
    return {"audio_file": audio_file}


# 질문을 받아서 처리하는 API
@router.post('/api/ask_question')
async def ask_question(user_question: UserQuestion):
    question_text = user_question.question.strip()
    if not question_text:
        raise HTTPException(status_code=400, detail="Question is required")

    generated_answer = generate_gpt_answer(question_text)
    return {"answer": generated_answer}

