import csv
import pymysql
import re
import time
from datetime import datetime, timedelta

# --- Настройки подключения к БД ---
conn = pymysql.connect(
    host='localhost',
    user='b2b_leads',
    password='v01lEHeGIdJUXSjQ',
    database='b2b_leads',
    charset='utf8mb4'
)
cursor = conn.cursor()

# --- Кэш ---
okved_cache = {}
category_cache = {}
brand_cache = {}
phone_cache = {}
email_cache = {}
company_cache = {}

# --- Вспомогательные функции ---
def normalize_phone(phone):
    digits = re.sub(r'\D', '', phone)
    if digits.startswith('89'):
        digits = '7' + digits[1:]
    elif digits.startswith('9') and len(digits) == 10:
        digits = '7' + digits
    return digits if len(digits) >= 10 else None

def normalize_email(email):
    email = email.strip().lower()
    return email if re.match(r"[^@\s]+@[^@\s]+\.[^@\s]+", email) else None

def excel_date_to_iso(val):
    try:
        base = datetime(1899, 12, 30)
        return (base + timedelta(days=int(val))).date().isoformat()
    except:
        return None

def insert_or_get_id(table, column, value, cache):
    if value in cache:
        return cache[value]
    for _ in range(3):
        try:
            cursor.execute(f"SELECT id FROM {table} WHERE {column} = %s", (value,))
            result = cursor.fetchone()
            if result:
                cache[value] = result[0]
                return result[0]
            cursor.execute(f"INSERT INTO {table} ({column}) VALUES (%s)", (value,))
            cache[value] = cursor.lastrowid
            return cursor.lastrowid
        except pymysql.err.OperationalError as e:
            if e.args[0] == 1213:
                print(f"Deadlock on insert_or_get_id for {table}.{column}='{value}', retrying...")
                time.sleep(0.5)
                continue
            else:
                raise

def link_m2m(link_table, left_id_name, right_id_name, left_id, right_id):
    for _ in range(3):
        try:
            cursor.execute(f"REPLACE INTO {link_table} ({left_id_name}, {right_id_name}) VALUES (%s, %s)", (left_id, right_id))
            return
        except pymysql.err.OperationalError as e:
            if e.args[0] == 1213:
                print(f"Deadlock on linking {link_table}, retrying...")
                time.sleep(0.5)
                continue
            else:
                raise

# --- Загрузка CSV ---
with open('VB2.csv', newline='', encoding='utf-8-sig') as csvfile:
    rows = list(csv.DictReader(csvfile, delimiter=';'))

start_time = time.time()

# --- Этап 1: Компании и ОКВЭДы ---
for i, row in enumerate(rows):
    try:
        name = row['Наименование организации на Wildberries'].strip()
        inn = row['ИНН'].strip()
        if not inn:
            continue

        ogrn = row['ОГРН'].strip()
        ogrn = '{:.0f}'.format(float(ogrn.replace(',', '.'))) if 'E+' in ogrn else ogrn
        reg_date = excel_date_to_iso(row['Дата регистрации ИП']) if row['Дата регистрации ИП'] else None
        region = row['Регион регистрации'].strip()
        activity = row['Вид деятельности/отрасль'].strip()
        activity_code = row['Код основного вида деятельности'].strip()

        cursor.execute("""
            INSERT INTO companies (name, inn, ogrn, registration_date, region)
            VALUES (%s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE
                name = VALUES(name),
                ogrn = VALUES(ogrn),
                registration_date = VALUES(registration_date),
                region = VALUES(region),
                id=LAST_INSERT_ID(id)
        """, (name, inn, ogrn, reg_date, region))
        company_id = cursor.lastrowid
        company_cache[inn] = company_id

        if activity_code:
            okved_id = insert_or_get_id('okved', 'code', activity_code, okved_cache)
            cursor.execute("UPDATE okved SET description = %s WHERE id = %s", (activity, okved_id))
            link_m2m('company_okveds', 'company_id', 'okved_id', company_id, okved_id)

        if i % 200 == 0:
            elapsed = int(time.time() - start_time)
            remaining = int(elapsed / (i + 1) * (len(rows) - i))
            print(f"[1/3] Этап 1: {i}/{len(rows)} | Осталось ~ {remaining//3600}ч:{(remaining%3600)//60}м:{remaining%60}с")
            conn.commit()
    except Exception as e:
        print(f"[1/3] Ошибка в строке {i}: {e}")
        conn.rollback()
conn.commit()

# --- Этап 2: Контакты, телефоны, email, сайты ---
for i, row in enumerate(rows):
    try:
        inn = row['ИНН'].strip()
        if not inn or inn not in company_cache:
            continue
        company_id = company_cache[inn]
        name = row['Наименование организации на Wildberries'].strip()

        cursor.execute("SELECT id FROM contacts WHERE entity_type = 'company' AND entity_id = %s AND name = %s", (company_id, name))
        contact = cursor.fetchone()
        if contact:
            contact_id = contact[0]
        else:
            cursor.execute("INSERT INTO contacts (entity_type, entity_id, name) VALUES ('company', %s, %s)", (company_id, name))
            contact_id = cursor.lastrowid

        for phone in row['Телефоны'].split(','):
            norm = normalize_phone(phone)
            if norm:
                phone_id = insert_or_get_id('phones', 'number', norm, phone_cache)
                link_m2m('contact_phones', 'contact_id', 'phone_id', contact_id, phone_id)

        for email in row['Email'].split(','):
            norm = normalize_email(email)
            if norm:
                email_id = insert_or_get_id('emails', 'email', norm, email_cache)
                link_m2m('contact_emails', 'contact_id', 'email_id', contact_id, email_id)

        for site in row['Сайт'].split(','):
            site = site.strip()
            if site:
                cursor.execute("SELECT id FROM company_websites WHERE url = %s AND company_id = %s", (site, company_id))
                if not cursor.fetchone():
                    cursor.execute("INSERT INTO company_websites (company_id, url) VALUES (%s, %s)", (company_id, site))

        if i % 200 == 0:
            elapsed = int(time.time() - start_time)
            remaining = int(elapsed / (i + 1) * (len(rows) - i))
            print(f"[2/3] Этап 2: {i}/{len(rows)} | Осталось ~ {remaining//3600}ч:{(remaining%3600)//60}м:{remaining%60}с")
            conn.commit()
    except Exception as e:
        print(f"[2/3] Ошибка в строке {i}: {e}")
        conn.rollback()
conn.commit()

# --- Этап 3: Селлеры, категории, бренды ---
for i, row in enumerate(rows):
    try:
        inn = row['ИНН'].strip()
        if not inn or inn not in company_cache:
            continue
        company_id = company_cache[inn]

        wb_url = row['Страница на сайте Wildberries'].strip()
        reg_on_wb = excel_date_to_iso(row['Дата регистрации на Wildberries']) if row['Дата регистрации на Wildberries'] else None
        rating = row['Рейтинг продавца'].strip() or None
        reviews = row['Количество отзывов'].strip() or None
        sold = row['Проданных товаров'].strip() or None

        cursor.execute("SELECT id FROM marketplace_sellers WHERE platform = 'wb' AND wb_url = %s", (wb_url,))
        seller = cursor.fetchone()

        if seller:
            seller_id = seller[0]
            cursor.execute("""
                UPDATE marketplace_sellers SET
                    company_id = %s,
                    registration_on_wb = IF(%s != '', %s, registration_on_wb),
                    rating = IF(%s IS NOT NULL, %s, rating),
                    reviews_count = IF(%s IS NOT NULL, %s, reviews_count),
                    products_sold = IF(%s IS NOT NULL, %s, products_sold)
                WHERE id = %s
            """, (company_id, reg_on_wb, reg_on_wb, rating, rating, reviews, reviews, sold, sold, seller_id))
        else:
            cursor.execute("""
                INSERT INTO marketplace_sellers (company_id, platform, wb_url, registration_on_wb, rating, reviews_count, products_sold)
                VALUES (%s, 'wb', %s, %s, %s, %s, %s)
            """, (company_id, wb_url, reg_on_wb, rating, reviews, sold))
            seller_id = cursor.lastrowid

        for cat in row['Разделы товаров'].split(';'):
            cat = cat.strip()
            if cat:
                cat_id = insert_or_get_id('product_categories', 'name', cat, category_cache)
                link_m2m('seller_product_categories', 'seller_id', 'category_id', seller_id, cat_id)

        for brand in row['Бренды продавца'].split(';'):
            brand = brand.strip()
            if brand:
                brand_id = insert_or_get_id('brands', 'name', brand, brand_cache)
                link_m2m('seller_brands', 'seller_id', 'brand_id', seller_id, brand_id)

        if i % 200 == 0:
            elapsed = int(time.time() - start_time)
            remaining = int(elapsed / (i + 1) * (len(rows) - i))
            print(f"[3/3] Этап 3: {i}/{len(rows)} | Осталось ~ {remaining//3600}ч:{(remaining%3600)//60}м:{remaining%60}с")
            conn.commit()
    except Exception as e:
        print(f"[3/3] Ошибка в строке {i}: {e}")
        conn.rollback()

conn.commit()
print("Импорт завершен!")
