import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parents[1] / "src"))
sys.path.append(str(Path(__file__).resolve().parents[1]))
import h5py
import pandas as pd
import numpy as np

file_path = './submissions/prediction.h5'

print(f"Checking {file_path}")

try:
    with h5py.File(file_path, 'r') as f:
        print(f"Keys: {list(f.keys())}")
        
        for k in f.keys():
            print(f"\nDataset: {k}")
            d = f[k]
            # In HDF5 format used by pandas (PyTables), data is often stored differently
            # Usually as 'block0_values' inside the group if simple
            # But the submission script uses df.to_hdf(), which creates complex structure
            
            # Let's read it with pandas to see resulting shape
            try:
                df = pd.read_hdf(file_path, key=k)
                print(f"  Shape (as DataFrame): {df.shape}")
                print(f"  Rows (cell types): {df.index.tolist()}")
                print(f"  Columns (samples): {len(df.columns)}")
                print(f"  First 5 columns: {df.columns[:5].tolist()}")
                
                # Check for transposition
                if df.shape[0] > df.shape[1] and df.shape[0] > 10:
                    print("  WARNING: Looks transposed! Rows should be cell types (small number)")
            except Exception as e:
                print(f"  Error reading with pandas: {e}")
                # Fallback to inspect structure
                print(f"  HDF5 structure: {list(d.keys()) if hasattr(d, 'keys') else 'Dataset'}")

except Exception as e:
    print(f"Error opening file: {e}")
