import csv
import pymysql
import re
import time
from datetime import datetime, timedelta

# --- Настройки подключения к БД ---
conn = pymysql.connect(
    host='localhost',
    user='b2b_leads',
    password='v01lEHeGIdJUXSjQ',
    database='b2b_leads',
    charset='utf8mb4'
)
cursor = conn.cursor()

# --- Кэш для ускорения ---
okved_cache = {}
category_cache = {}
brand_cache = {}
phone_cache = {}
email_cache = {}
company_cache = {}

# --- Вспомогательные функции ---
def normalize_phone(phone):
    digits = re.sub(r'\D', '', phone)
    if digits.startswith('89'):
        digits = '7' + digits[1:]
    elif digits.startswith('9') and len(digits) == 10:
        digits = '7' + digits
    return digits if len(digits) >= 10 else None

def normalize_email(email):
    email = email.strip().lower()
    return email if re.match(r"[^@\s]+@[^@\s]+\.[^@\s]+", email) else None

def parse_human_date(date_str):
    months = {
        'января': '01', 'февраля': '02', 'марта': '03', 'апреля': '04',
        'мая': '05', 'июня': '06', 'июля': '07', 'августа': '08',
        'сентября': '09', 'октября': '10', 'ноября': '11', 'декабря': '12'
    }
    try:
        parts = date_str.strip().replace('года', '').split()
        if len(parts) == 3:
            day, month_rus, year = parts
            return f"{year}-{months[month_rus]:0>2}-{int(day):02d}"
    except:
        return None
    return None

def insert_or_get_id(table, column, value, cache):
    if value in cache:
        return cache[value]
    for _ in range(3):
        try:
            cursor.execute(f"SELECT id FROM {table} WHERE {column} = %s", (value,))
            result = cursor.fetchone()
            if result:
                cache[value] = result[0]
                return result[0]
            cursor.execute(f"INSERT INTO {table} ({column}) VALUES (%s)", (value,))
            cache[value] = cursor.lastrowid
            return cursor.lastrowid
        except pymysql.err.OperationalError as e:
            if e.args[0] == 1213:
                print(f"Deadlock on insert_or_get_id for {table}.{column}='{value}', retrying...")
                time.sleep(0.5)
                continue
            else:
                raise

def link_m2m(link_table, left_id_name, right_id_name, left_id, right_id):
    for _ in range(3):
        try:
            cursor.execute(f"REPLACE INTO {link_table} ({left_id_name}, {right_id_name}) VALUES (%s, %s)", (left_id, right_id))
            return
        except pymysql.err.OperationalError as e:
            if e.args[0] == 1213:
                print(f"Deadlock on linking {link_table}, retrying...")
                time.sleep(0.5)
                continue
            else:
                raise

# --- Загрузка CSV ---
rows = []
with open('VB3.csv', newline='', encoding='utf-8-sig') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=';')
    rows = list(reader)

# --- Прогрессбар с оценкой времени ---
def print_progress(stage, i, total, start_time):
    elapsed = time.time() - start_time
    avg_per_row = elapsed / (i + 1)
    remaining = avg_per_row * (total - i - 1)
    eta = str(timedelta(seconds=int(remaining)))
    print(f"[{stage}] Обработано строк: {i}/{total}, ETA: {eta}")

# --- Этап 1: Компании и ОКВЭДы ---
start1 = time.time()
total = len(rows)
for i, row in enumerate(rows):
    try:
        company_name = row.get('Наименование организации на Wildberries', '').strip()
        inn = row.get('ИНН', '').strip()
        if not inn:
            continue

        ogrn = row.get('ОГРН', '').strip()
        ogrn = '{:.0f}'.format(float(ogrn.replace(',', '.'))) if 'E+' in ogrn else ogrn
        reg_date = parse_human_date(row.get('Дата регистрации', '')) if row.get('Дата регистрации') else None
        region = row.get('Регион', '').strip()
        activity = row.get('Отрасль', '').strip()
        activity_code = row.get('Код ОКВЭД', '').strip()

        cursor.execute("""
            INSERT INTO companies (name, inn, ogrn, registration_date, region)
            VALUES (%s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE
                name = VALUES(name),
                ogrn = VALUES(ogrn),
                registration_date = VALUES(registration_date),
                region = VALUES(region),
                id=LAST_INSERT_ID(id)
        """, (company_name, inn, ogrn, reg_date, region))
        company_id = cursor.lastrowid
        company_cache[inn] = company_id

        if activity_code:
            okved_id = insert_or_get_id('okved', 'code', activity_code, okved_cache)
            cursor.execute("UPDATE okved SET description = %s WHERE id = %s", (activity, okved_id))
            link_m2m('company_okveds', 'company_id', 'okved_id', company_id, okved_id)

        if i % 200 == 0:
            print_progress("1/3", i, total, start1)
            conn.commit()

    except Exception as e:
        print(f"[1/3] Ошибка в строке {i}: {e}")
        conn.rollback()
conn.commit()

# --- Этап 2: Селлеры ---
start2 = time.time()
for i, row in enumerate(rows):
    try:
        inn = row.get('ИНН', '').strip()
        if not inn or inn not in company_cache:
            continue

        company_id = company_cache[inn]
        wb_url = row.get('Страница на сайте Wildberries', '').strip()
        seller_reg_date = parse_human_date(row.get('Дата регистрации на Wildberries', ''))
        rating = row.get('Рейтинг продавца', '').strip() or None
        reviews = row.get('Количество отзывов', '').strip() or None
        sold = row.get('Проданных товаров', '').strip() or None

        cursor.execute("SELECT id FROM marketplace_sellers WHERE platform = 'wb' AND wb_url = %s", (wb_url,))
        result = cursor.fetchone()

        if result:
            seller_id = result[0]
            cursor.execute("""
                UPDATE marketplace_sellers SET
                    company_id = %s,
                    registration_on_wb = IF(%s != '', %s, registration_on_wb),
                    rating = IF(%s IS NOT NULL, %s, rating),
                    reviews_count = IF(%s IS NOT NULL, %s, reviews_count),
                    products_sold = IF(%s IS NOT NULL, %s, products_sold)
                WHERE id = %s
            """, (company_id, seller_reg_date, seller_reg_date, rating, rating, reviews, reviews, sold, sold, seller_id))
        else:
            cursor.execute("""
                INSERT INTO marketplace_sellers (company_id, platform, wb_url, registration_on_wb, rating, reviews_count, products_sold)
                VALUES (%s, 'wb', %s, %s, %s, %s, %s)
            """, (company_id, wb_url, seller_reg_date, rating, reviews, sold))
            seller_id = cursor.lastrowid

        categories_raw = row.get('Разделы товаров', '').strip()
        if categories_raw:
            for cat in categories_raw.split(';'):
                cat_name = cat.strip()
                if cat_name:
                    cat_id = insert_or_get_id('product_categories', 'name', cat_name, category_cache)
                    link_m2m('seller_product_categories', 'seller_id', 'category_id', seller_id, cat_id)

        brands_raw = row.get('Бренды продавца', '').strip()
        if brands_raw:
            for brand in brands_raw.split(';'):
                brand_name = brand.strip()
                if brand_name:
                    brand_id = insert_or_get_id('brands', 'name', brand_name, brand_cache)
                    link_m2m('seller_brands', 'seller_id', 'brand_id', seller_id, brand_id)

        if i % 200 == 0:
            print_progress("2/3", i, total, start2)
            conn.commit()

    except Exception as e:
        print(f"[2/3] Ошибка в строке {i}: {e}")
        conn.rollback()
conn.commit()

# --- Этап 3: Контакты, телефоны, email, сайты ---
start3 = time.time()
for i, row in enumerate(rows):
    try:
        inn = row.get('ИНН', '').strip()
        if not inn or inn not in company_cache:
            continue

        company_id = company_cache[inn]
        contact_name = row.get('Наименование организации на Wildberries', '').strip()

        cursor.execute("SELECT id FROM contacts WHERE entity_type = 'company' AND entity_id = %s AND name = %s", (company_id, contact_name))
        contact = cursor.fetchone()
        if contact:
            contact_id = contact[0]
        else:
            cursor.execute("INSERT INTO contacts (entity_type, entity_id, name) VALUES ('company', %s, %s)", (company_id, contact_name))
            contact_id = cursor.lastrowid

        phones_raw = row.get('Телефоны', '').strip()
        for phone in phones_raw.split(','):
            norm_phone = normalize_phone(phone)
            if norm_phone:
                phone_id = insert_or_get_id('phones', 'number', norm_phone, phone_cache)
                link_m2m('contact_phones', 'contact_id', 'phone_id', contact_id, phone_id)

        emails_raw = row.get('Email', '').strip()
        for email in emails_raw.split(','):
            norm_email = normalize_email(email)
            if norm_email:
                email_id = insert_or_get_id('emails', 'email', norm_email, email_cache)
                link_m2m('contact_emails', 'contact_id', 'email_id', contact_id, email_id)

        websites_raw = row.get('Сайт', '').strip()
        for site in websites_raw.split(','):
            site = site.strip()
            if site:
                cursor.execute("SELECT id FROM company_websites WHERE url = %s AND company_id = %s", (site, company_id))
                if not cursor.fetchone():
                    cursor.execute("INSERT INTO company_websites (company_id, url) VALUES (%s, %s)", (company_id, site))

        if i % 200 == 0:
            print_progress("3/3", i, total, start3)
            conn.commit()

    except Exception as e:
        print(f"[3/3] Ошибка в строке {i}: {e}")
        conn.rollback()

conn.commit()
print("Импорт завершен!")
