#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import re
import requests
import json
import time
import logging
from urllib.parse import parse_qs, urlparse
from config import USER_AGENT

# Konfigurasi logging
logger = logging.getLogger(__name__)

def check_url_patterns(url):
    """
    Memeriksa apakah URL adalah URL Terabox yang valid.
    
    Args:
        url (str): URL yang akan diperiksa
        
    Returns:
        bool: True jika URL adalah URL Terabox yang valid, False jika tidak
    """
    patterns = [
        r"ww\.mirrobox\.com",
        r"www\.nephobox\.com",
        r"freeterabox\.com",
        r"www\.freeterabox\.com",
        r"1024tera\.com",
        r"4funbox\.co",
        r"www\.4funbox\.com",
        r"mirrobox\.com",
        r"nephobox\.com",
        r"terabox\.app",
        r"terabox\.com",
        r"www\.terabox\.ap",
        r"www\.terabox\.com",
        r"www\.1024tera\.co",
        r"www\.momerybox\.com",
        r"teraboxapp\.com",
        r"momerybox\.com",
        r"tibibox\.com",
        r"www\.tibibox\.com",
        r"www\.teraboxapp\.com",
    ]
    
    for pattern in patterns:
        if re.search(pattern, url):
            return True
    return False

def get_formatted_size(size_in_bytes):
    """
    Mengubah ukuran dalam bytes menjadi format yang mudah dibaca.
    
    Args:
        size_in_bytes (int): Ukuran dalam bytes
        
    Returns:
        str: Ukuran dalam format yang mudah dibaca (KB, MB, GB)
    """
    if size_in_bytes < 1024:
        return f"{size_in_bytes} B"
    elif size_in_bytes < 1024 * 1024:
        return f"{size_in_bytes / 1024:.2f} KB"
    elif size_in_bytes < 1024 * 1024 * 1024:
        return f"{size_in_bytes / (1024 * 1024):.2f} MB"
    else:
        return f"{size_in_bytes / (1024 * 1024 * 1024):.2f} GB"

def extract_terabox_url(text):
    """
    Mengekstrak URL Terabox dari teks.
    
    Args:
        text (str): Teks yang berisi URL Terabox
        
    Returns:
        str or None: URL Terabox jika ditemukan, None jika tidak
    """
    pattern = r"(https?://\S+)"
    urls = re.findall(pattern, text)
    
    for url in urls:
        if check_url_patterns(url):
            return url
    
    return None

def get_terabox_download_link(url):
    """
    Mendapatkan link download langsung dari URL Terabox.
    
    Args:
        url (str): URL Terabox
        
    Returns:
        dict: Informasi file termasuk link download, nama file, ukuran, dll.
              Atau None jika gagal
    """
    try:
        logger.info(f"Processing URL: {url}")
        
        # Metode 1: Menggunakan API terabox-dl-api.vercel.app (API baru yang lebih reliable)
        headers = {
            "User-Agent": USER_AGENT,
            "Accept": "application/json, text/plain, */*",
            "Content-Type": "application/json",
        }
        
        api_url = "https://terabox-dl-api.vercel.app/api/getDownloadLink"
        response = requests.post(
            api_url,
            headers=headers,
            json={"url": url},
            timeout=30
        )
        
        logger.info(f"API Response Status: {response.status_code}")
        
        if response.status_code == 200:
            try:
                data = response.json()
                logger.info(f"API Response Data: {json.dumps(data, indent=2)}")
                
                if data.get("status") == "success":
                    file_info = data.get("data", {})
                    file_name = file_info.get("filename", "terabox_file")
                    file_size = file_info.get("size")
                    direct_link = file_info.get("downloadLink")
                    
                    # Konversi ukuran file jika tersedia
                    size_bytes = None
                    if file_size:
                        # Coba ekstrak ukuran dalam bytes
                        size_match = re.search(r'([\d.]+)\s*([KMGT]?B)', file_size)
                        if size_match:
                            size_num = float(size_match.group(1))
                            size_unit = size_match.group(2)
                            
                            if size_unit == "KB":
                                size_bytes = int(size_num * 1024)
                            elif size_unit == "MB":
                                size_bytes = int(size_num * 1024 * 1024)
                            elif size_unit == "GB":
                                size_bytes = int(size_num * 1024 * 1024 * 1024)
                            elif size_unit == "TB":
                                size_bytes = int(size_num * 1024 * 1024 * 1024 * 1024)
                            else:  # Bytes
                                size_bytes = int(size_num)
                    
                    return {
                        "file_name": file_name,
                        "link": direct_link,
                        "direct_link": direct_link,
                        "thumb": file_info.get("thumbnail"),
                        "size": file_size or "Unknown",
                        "size_bytes": size_bytes,
                    }
            except Exception as e:
                logger.error(f"Error parsing API response: {str(e)}")
        
        # Metode 2: Menggunakan API alternatif teraboxdownloader.com
        try:
            api_url = "https://teraboxdownloader.com/api/task/create"
            headers = {
                "User-Agent": USER_AGENT,
                "Content-Type": "application/x-www-form-urlencoded",
                "Accept": "application/json, text/plain, */*",
                "Origin": "https://teraboxdownloader.com",
                "Referer": "https://teraboxdownloader.com/",
            }
            
            data = {
                "url": url
            }
            
            response = requests.post(api_url, headers=headers, data=data, timeout=30)
            logger.info(f"Alternative API Response Status: {response.status_code}")
            
            if response.status_code == 200:
                try:
                    result = response.json()
                    logger.info(f"Alternative API Response: {json.dumps(result, indent=2)}")
                    
                    if result.get("status") == "success":
                        task_id = result.get("data", {}).get("task_id")
                        
                        if task_id:
                            # Dapatkan hasil task
                            time.sleep(2)  # Tunggu sebentar agar task diproses
                            
                            result_url = f"https://teraboxdownloader.com/api/task/{task_id}"
                            result_response = requests.get(result_url, headers=headers, timeout=30)
                            
                            if result_response.status_code == 200:
                                result_data = result_response.json()
                                logger.info(f"Task Result: {json.dumps(result_data, indent=2)}")
                                
                                if result_data.get("status") == "success":
                                    file_data = result_data.get("data", {})
                                    file_name = file_data.get("file_name", "terabox_file")
                                    file_size = file_data.get("file_size", "Unknown")
                                    direct_link = file_data.get("direct_download_link")
                                    
                                    # Konversi ukuran file jika tersedia
                                    size_bytes = None
                                    if isinstance(file_size, (int, float)):
                                        size_bytes = int(file_size)
                                    elif isinstance(file_size, str):
                                        size_match = re.search(r'([\d.]+)\s*([KMGT]?B)', file_size)
                                        if size_match:
                                            size_num = float(size_match.group(1))
                                            size_unit = size_match.group(2)
                                            
                                            if size_unit == "KB":
                                                size_bytes = int(size_num * 1024)
                                            elif size_unit == "MB":
                                                size_bytes = int(size_num * 1024 * 1024)
                                            elif size_unit == "GB":
                                                size_bytes = int(size_num * 1024 * 1024 * 1024)
                                            elif size_unit == "TB":
                                                size_bytes = int(size_num * 1024 * 1024 * 1024 * 1024)
                                            else:  # Bytes
                                                size_bytes = int(size_num)
                                    
                                    return {
                                        "file_name": file_name,
                                        "link": direct_link,
                                        "direct_link": direct_link,
                                        "thumb": file_data.get("thumbnail"),
                                        "size": file_size,
                                        "size_bytes": size_bytes,
                                    }
                except Exception as e:
                    logger.error(f"Error processing alternative API response: {str(e)}")
        except Exception as e:
            logger.error(f"Error with alternative API: {str(e)}")
        
        # Metode 3: Menggunakan scraping sederhana sebagai fallback
        try:
            # Ubah domain ke 1024terabox.com untuk konsistensi
            netloc = urlparse(url).netloc
            url = url.replace(netloc, "1024terabox.com")
            
            headers = {
                "User-Agent": USER_AGENT,
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
                "Accept-Language": "en-US,en;q=0.5",
            }
            
            response = requests.get(url, headers=headers, timeout=10)
            logger.info(f"Scraping Response Status: {response.status_code}")
            
            if response.status_code == 200:
                # Coba ekstrak nama file dari halaman
                file_name_match = re.search(r'<title>(.*?) - Terabox</title>', response.text)
                file_name = file_name_match.group(1) if file_name_match else "terabox_file"
                
                # Coba ekstrak thumbnail dari halaman
                thumbnail_match = re.search(r'og:image" content="([^"]+)"', response.text)
                thumbnail = thumbnail_match.group(1) if thumbnail_match else None
                
                # Karena kita tidak bisa mendapatkan link download langsung dengan scraping sederhana,
                # kita hanya mengembalikan informasi file yang berhasil diekstrak
                return {
                    "file_name": file_name,
                    "link": url,  # Gunakan URL asli sebagai link
                    "direct_link": url,  # Tidak ada link download langsung
                    "thumb": thumbnail,
                    "size": "Unknown",
                    "size_bytes": None,
                    "requires_premium": True,  # Tandai bahwa file ini memerlukan akses premium
                }
        except Exception as e:
            logger.error(f"Error scraping page: {str(e)}")
        
        # Jika semua metode gagal, kembalikan informasi dasar
        return {
            "file_name": "terabox_file",
            "link": url,
            "direct_link": url,
            "thumb": None,
            "size": "Unknown",
            "size_bytes": None,
            "requires_premium": True,
            "message": "Tidak dapat mengekstrak link download langsung. File ini mungkin memerlukan akses premium atau URL tidak valid."
        }
    
    except Exception as e:
        logger.error(f"Error getting Terabox download link: {str(e)}")
        return None
