import re, json

sql_path = r'c:\Users\asenc\Desktop\web eme antigravity\nevaluk_wp280 (2).sql'

print("Reading SQL file...")
with open(sql_path, 'r', encoding='utf-8', errors='replace') as f:
    lines = f.readlines()

print(f"Total lines: {len(lines)}")

# Find the INSERT INTO wpk2_posts lines and extract column names and a sample
products = []
prices = {}
regular_prices = {}
sale_prices = {}
thumbnails = {}
attachments = {}

in_posts_insert = False
in_postmeta_insert = False
current_table = None

for i, line in enumerate(lines):
    # Detect table
    if 'INSERT INTO `wpk2_posts`' in line:
        current_table = 'posts'
    elif 'INSERT INTO `wpk2_postmeta`' in line:
        current_table = 'postmeta'
    elif line.startswith('INSERT INTO'):
        current_table = None
        continue

    if current_table == 'posts':
        # Find value tuples with 'product' or 'attachment' post_type
        if "'product'" in line or "'attachment'" in line:
            # Extract all tuples from this line
            tuples = re.findall(r"\(([^)]+)\)", line)
            for t in tuples:
                # Split by comma but respect quoted strings
                fields = []
                current = ''
                in_quote = False
                for ch in t:
                    if ch == "'" and (not current or current[-1] != '\\'):
                        in_quote = not in_quote
                        current += ch
                    elif ch == ',' and not in_quote:
                        fields.append(current.strip())
                        current = ''
                    else:
                        current += ch
                fields.append(current.strip())
                
                if len(fields) >= 21:
                    post_id = fields[0]
                    post_title = fields[5].strip("'")
                    post_type = fields[20].strip("'")
                    guid = fields[14].strip("'") if len(fields) > 14 else ''
                    post_name = fields[22].strip("'") if len(fields) > 22 else ''
                    post_status = fields[7].strip("'") if len(fields) > 7 else ''
                    
                    if post_type == 'product' and post_status == 'publish':
                        products.append({
                            'id': int(post_id),
                            'title': post_title,
                            'slug': post_name,
                        })
                    elif post_type == 'attachment' and guid:
                        attachments[int(post_id)] = guid
    
    elif current_table == 'postmeta':
        if "'_price'" in line or "'_regular_price'" in line or "'_sale_price'" in line or "'_thumbnail_id'" in line:
            tuples = re.findall(r"\(([^)]+)\)", line)
            for t in tuples:
                fields = []
                current = ''
                in_quote = False
                for ch in t:
                    if ch == "'" and (not current or current[-1] != '\\'):
                        in_quote = not in_quote
                        current += ch
                    elif ch == ',' and not in_quote:
                        fields.append(current.strip())
                        current = ''
                    else:
                        current += ch
                fields.append(current.strip())

                if len(fields) >= 4:
                    meta_id = fields[0]
                    post_id = int(fields[1])
                    meta_key = fields[2].strip("'")
                    meta_value = fields[3].strip("'")
                    
                    if meta_key == '_price' and meta_value:
                        try: prices[post_id] = float(meta_value)
                        except: pass
                    elif meta_key == '_regular_price' and meta_value:
                        try: regular_prices[post_id] = float(meta_value)
                        except: pass
                    elif meta_key == '_sale_price' and meta_value:
                        try: sale_prices[post_id] = float(meta_value)
                        except: pass
                    elif meta_key == '_thumbnail_id' and meta_value:
                        try: thumbnails[post_id] = int(meta_value)
                        except: pass

print(f"Products: {len(products)}")
print(f"Prices: {len(prices)}, Regular: {len(regular_prices)}, Sale: {len(sale_prices)}")
print(f"Thumbnails: {len(thumbnails)}, Attachments: {len(attachments)}")

# Merge
full_products = []
for p in products:
    pid = p['id']
    img_id = thumbnails.get(pid)
    img_url = attachments.get(img_id, '') if img_id else ''
    
    pr = prices.get(pid, 0)
    rp = regular_prices.get(pid, 0)
    sp = sale_prices.get(pid, 0)
    
    full_products.append({
        'id': pid,
        'title': p['title'],
        'slug': p['slug'],
        'price': pr,
        'regular_price': rp,
        'sale_price': sp,
        'image': img_url,
    })

full_products.sort(key=lambda x: x['id'], reverse=True)

output_path = r'c:\Users\asenc\Desktop\web eme antigravity\eme-theme\products-data.json'
with open(output_path, 'w', encoding='utf-8') as f:
    json.dump(full_products[:30], f, ensure_ascii=False, indent=2)

print(f"\nSaved {min(len(full_products),30)} products:")
for p in full_products[:20]:
    img_status = "IMG" if p['image'] else "---"
    on_sale = "SALE" if p['sale_price'] else "    "
    print(f"  [{img_status}] [{on_sale}] {p['title'][:50]:50s} ${p['price']:>8.0f}")
