import React, { useState, useEffect } from 'react';
import {
  Typography, Box, TextField, InputAdornment, List, ListItem,
  ListItemText, Chip, Divider, Grid, Card, Table, TableBody,
  TableCell, TableContainer, TableHead, TableRow, Checkbox,
  Select, MenuItem, FormControl, InputLabel, Tabs, Tab, Stepper, Step, StepLabel, Button, Paper, CardContent, LinearProgress,
  useTheme
} from '@mui/material';
import SearchIcon from '@mui/icons-material/Search';
import TabContext from '@mui/lab/TabContext';
import TabList from '@mui/lab/TabList';
import TabPanel from '@mui/lab/TabPanel';
import { 
  LineChart, Line, XAxis, YAxis, CartesianGrid, Tooltip, Legend, 
  BarChart, Bar, ResponsiveContainer
} from 'recharts';

const benchmarkHistory = [
  {
    id: 4,
    name: "Hardware Performance - AMD vs NVIDIA Comparison",
    type: "Hardware Performance",
    date: "2024-02-01",
    models: [
      "meta-llama/Llama-3.2-3B-Instruct",
      "meta-llama/Llama-3.1-8B",
      "inceptionai/jais-adapted-7b-chat"
    ],
    hardware: ["AMD MI250", "AMD MI210", "NVIDIA GH200"],
    metrics: {
      latency: {
        "AMD MI250": {
          "meta-llama/Llama-3.2-3B-Instruct": 32,
          "meta-llama/Llama-3.1-8B": 48,
          "inceptionai/jais-adapted-7b-chat": 45
        },
        "AMD MI210": {
          "meta-llama/Llama-3.2-3B-Instruct": 38,
          "meta-llama/Llama-3.1-8B": 55,
          "inceptionai/jais-adapted-7b-chat": 52
        },
        "NVIDIA GH200": {
          "meta-llama/Llama-3.2-3B-Instruct": 28,
          "meta-llama/Llama-3.1-8B": 42,
          "inceptionai/jais-adapted-7b-chat": 40
        }
      },
      throughput: {
        "AMD MI250": {
          "meta-llama/Llama-3.2-3B-Instruct": 220,
          "meta-llama/Llama-3.1-8B": 180,
          "inceptionai/jais-adapted-7b-chat": 190
        },
        "AMD MI210": {
          "meta-llama/Llama-3.2-3B-Instruct": 190,
          "meta-llama/Llama-3.1-8B": 160,
          "inceptionai/jais-adapted-7b-chat": 170
        },
        "NVIDIA GH200": {
          "meta-llama/Llama-3.2-3B-Instruct": 250,
          "meta-llama/Llama-3.1-8B": 210,
          "inceptionai/jais-adapted-7b-chat": 220
        }
      }
    }
  },
  {
    id: 5,
    name: "Dataset Performance - Language Model Comparison",
    type: "Dataset Performance",
    date: "2024-02-02",
    models: [
      "meta-llama/Llama-3.2-3B-Instruct",
      "meta-llama/Llama-3.1-8B",
      "inceptionai/jais-adapted-7b-chat"
    ],
    dataset: "mlabonne/guanaco-llama2-1k",
    metrics: {
      history: Array.from({ length: 50 }, (_, i) => ({
        step: i,
        "meta-llama/Llama-3.2-3B-Instruct": {
          accuracy: 0.82 + (Math.random() * 0.02) + (i * 0.002),
          perplexity: 12 - (Math.random() * 0.5) - (i * 0.1),
        },
        "meta-llama/Llama-3.1-8B": {
          accuracy: 0.85 + (Math.random() * 0.02) + (i * 0.001),
          perplexity: 10 - (Math.random() * 0.5) - (i * 0.08),
        },
        "inceptionai/jais-adapted-7b-chat": {
          accuracy: 0.84 + (Math.random() * 0.02) + (i * 0.0015),
          perplexity: 11 - (Math.random() * 0.5) - (i * 0.09),
        }
      }))
    }
  }
];

const myModels = [
  { 
    id: 1, 
    name: "Mistral-7B-Instruct Custom", 
    description: "Fine-tuned instruction model", 
    version: "v1.0.1", 
    accuracy: "92%", 
    trainingData: "1M+ samples", 
    updatedAt: "2 days ago", 
    dataset: "Customer Support Conversations",
    parameters: "7B",
    modelSize: "14GB",
    tokenSpeed: "180 tokens/s",
    maxContext: "8K tokens",
    baseModel: "mistralai/Mistral-7B-Instruct-v0.3"
  },
  { 
    id: 2, 
    name: "CodeLlama Python Custom", 
    description: "Fine-tuned Python code generation model", 
    version: "v1.1.0", 
    accuracy: "93%", 
    trainingData: "800K samples", 
    updatedAt: "3 days ago", 
    dataset: "Python Code Examples",
    parameters: "7B",
    modelSize: "13GB", 
    tokenSpeed: "200 tokens/s",
    maxContext: "4K tokens",
    baseModel: "codellama/CodeLlama-7b-Python-hf"
  },
  { 
    id: 3, 
    name: "Llama-3.2 Chat Custom", 
    description: "Fine-tuned chat model", 
    version: "v1.0.2", 
    accuracy: "94%", 
    trainingData: "2M+ samples", 
    updatedAt: "1 day ago", 
    dataset: "Conversation Dialogs",
    parameters: "3B",
    modelSize: "6GB",
    tokenSpeed: "250 tokens/s", 
    maxContext: "4K tokens",
    baseModel: "meta-llama/Llama-3.2-3B-Instruct"
  }
];

const publicModels = [
  { id: 1, name: "meta-llama/Llama-3.2-3B-Instruct", size: "3B parameters", type: "Instruction-tuned LLM" },
  { id: 2, name: "meta-llama/Llama-3.1-8B", size: "8B parameters", type: "Instruction-tuned LLM" },
  { id: 3, name: "nvidia/Llama3-ChatQA-2-8B", size: "8B parameters", type: "Chat QA Model" },
  { id: 4, name: "NousResearch/Llama-2-7b-chat-hf", size: "7B parameters", type: "Chat Model" },
  { id: 5, name: "codellama/CodeLlama-7b-hf", size: "7B parameters", type: "Code Generation Model" },
  { id: 6, name: "codellama/CodeLlama-7b-Python-hf", size: "7B parameters", type: "Python Code Generation Model" },
  { id: 7, name: "mistralai/Mistral-7B-Instruct-v0.3", size: "7B parameters", type: "Instruction-tuned LLM" },
  { id: 8, name: "mistralai/Mixtral-8x7B-Instruct-v0.1", size: "8x7B parameters", type: "Instruction-tuned LLM" },
  { id: 9, name: "FlagAlpha/Llama2-Chinese-13b-Chat", size: "13B parameters", type: "Chinese Chat Model" },
  { id: 10, name: "inceptionai/jais-13b", size: "13B parameters", type: "General Purpose Model" }
];

const datasets = [
  { id: 1, name: "mlabonne/guanaco-llama2-1k", taskType: "", type: "General", size: "1K", samples: 1000, description: "A subset of the OpenAssistant-Guanaco dataset, formatted to match Llama 2's prompt style.", updated: "2 days ago" },
  { id: 2, name: "bitext/Bitext-customer-support-llm-chatbot-training-dataset", taskType: "QA", type: "QA", size: "10K-100K", samples: 26872, description: "Contains 26,872 question-answer pairs across 27 intents in customer service, totaling approximately 3.57 million tokens.", updated: "1 week ago" },
  { id: 3, name: "b-mc2/sql-create-context", taskType: "QA", type: "QA", size: "1K-10K", samples: 5000, description: "A SQL-focused dataset for generating context in QA tasks.", updated: "3 days ago" },
  { id: 4, name: "google/xtreme", taskType: "QA", type: "QA", size: "100K+", samples: 100000, description: "A benchmark for evaluating cross-lingual generalization across 40 languages and 9 tasks.", updated: "1 month ago" },
  { id: 5, name: "mandarjoshi/trivia_qa", taskType: "QA", type: "QA", size: "100K+", samples: 650000, description: "A reading comprehension dataset containing over 650K question-answer pairs.", updated: "2 weeks ago" },
  { id: 6, name: "riotu-lab/ArabicQA_2.1M", taskType: "QA", type: "QA", size: "1M+", samples: 2100000, description: "An Arabic question-answering dataset with 2.1 million samples.", updated: "5 days ago" },
  { id: 7, name: "llamafactory/alpaca_gpt4_en", taskType: "QA", type: "QA", size: "50K", samples: 50000, description: "A dataset tailored for Alpaca and GPT-4 QA tasks.", updated: "1 day ago" },
  { id: 8, name: "Quardo/gpt-4o-qa", taskType: "QA", type: "QA", size: "10K", samples: 10000, description: "A GPT-4 fine-tuning dataset for QA tasks.", updated: "4 days ago" },
  { id: 9, name: "LimYeri/LeetCode_Python_Solutions_v2", taskType: "QA", type: "CODE", size: "5K", samples: 5000, description: "A dataset containing Python solutions to LeetCode problems.", updated: "1 week ago" }
];

const hardwareOptions = [
  {
    id: 't4',
    name: 'NVIDIA T4',
    specs: '16GB VRAM, 8.1 TFLOPS',
    cost: '$0.35/hour',
    availability: 'High'
  },
  {
    id: 'a100',
    name: 'NVIDIA A100',
    specs: '80GB VRAM, 19.5 TFLOPS',
    cost: '$3.00/hour',
    availability: 'Medium'
  },
  {
    id: 'h100',
    name: 'NVIDIA H100',
    specs: '80GB VRAM, 26.2 TFLOPS',
    cost: '$5.00/hour',
    availability: 'Limited'
  }
];

export default function Benchmark({ myModels, publicModels }) {
  const [view, setView] = useState('new'); // 'new' or 'history'
  const [selectedModels, setSelectedModels] = useState([]);
  const [comparisonType, setComparisonType] = useState('dataset');
  const [selectedDataset, setSelectedDataset] = useState(null);
  const [selectedHardware, setSelectedHardware] = useState(null);

  return (
    <Box sx={{ display: 'flex', flexDirection: 'column', gap: 3 }}>
      {/* Top Navigation */}
      <Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
        <Tabs value={view} onChange={(e, newValue) => setView(newValue)}>
          <Tab label="New Benchmark" value="new" />
          <Tab label="Benchmark History" value="history" />
        </Tabs>
      </Box>

      {view === 'new' ? (
        <NewBenchmark 
          myModels={myModels}
          publicModels={publicModels}
          selectedModels={selectedModels}
          setSelectedModels={setSelectedModels}
          comparisonType={comparisonType}
          setComparisonType={setComparisonType}
          selectedDataset={selectedDataset}
          setSelectedDataset={setSelectedDataset}
          selectedHardware={selectedHardware}
          setSelectedHardware={setSelectedHardware}
        />
      ) : (
        <BenchmarkHistory />
      )}
    </Box>
  );
}

function NewBenchmark({ 
  myModels, 
  publicModels, 
  selectedModels,
  setSelectedModels,
  comparisonType,
  setComparisonType,
  selectedDataset,
  setSelectedDataset,
  selectedHardware,
  setSelectedHardware 
}) {
  const [step, setStep] = useState(0);
  
  const steps = [
    {
      label: 'Select Models',
      component: (
        <ModelSelection 
          myModels={myModels}
          publicModels={publicModels}
          selectedModels={selectedModels}
          setSelectedModels={setSelectedModels}
        />
      )
    },
    {
      label: 'Choose Comparison',
      component: (
        <ComparisonSelection 
          comparisonType={comparisonType}
          setComparisonType={setComparisonType}
          selectedDataset={selectedDataset}
          setSelectedDataset={setSelectedDataset}
          selectedHardware={selectedHardware}
          setSelectedHardware={setSelectedHardware}
        />
      )
    },
    {
      label: 'Run Benchmark',
      component: (
        <BenchmarkExecution 
          selectedModels={selectedModels}
          comparisonType={comparisonType}
          selectedDataset={selectedDataset}
          selectedHardware={selectedHardware}
        />
      )
    }
  ];

  return (
    <Box>
      {/* Stepper */}
      <Stepper activeStep={step}>
        {steps.map((s, index) => (
          <Step key={index}>
            <StepLabel>{s.label}</StepLabel>
          </Step>
        ))}
      </Stepper>

      {/* Step Content */}
      <Box sx={{ mt: 4 }}>
        {steps[step].component}
      </Box>

      {/* Navigation */}
      <Box sx={{ mt: 3, display: 'flex', justifyContent: 'space-between' }}>
        <Button 
          disabled={step === 0}
          onClick={() => setStep(prev => prev - 1)}
        >
          Back
        </Button>
        <Button 
          variant="contained"
          disabled={!canProceed(step, selectedModels, comparisonType, selectedDataset, selectedHardware)}
          onClick={() => step < steps.length - 1 ? setStep(prev => prev + 1) : handleBenchmarkStart()}
        >
          {step === steps.length - 1 ? 'Start Benchmark' : 'Next'}
        </Button>
      </Box>
    </Box>
  );
}
function ModelCard({ model, selected, onSelect }) {
  return (
    <Card 
      sx={{ 
        height: '100%', 
        display: 'flex', 
        flexDirection: 'column',
        border: selected ? 2 : 1,
        borderColor: selected ? 'primary.main' : 'divider',
        cursor: 'pointer'
      }}
      onClick={() => onSelect(model)}
    >
      <CardContent sx={{ flexGrow: 1 }}>
        <Typography variant="h6" component="div" gutterBottom>
          {model.name}
        </Typography>
        
        {/* Basic model information */}
        <Box sx={{ mt: 2, display: 'flex', flexWrap: 'wrap', gap: 1 }}>
          <Chip 
            size="small"
            label={model.size || `${model.parameters} parameters`}
            color="primary"
          />
          {model.type && (
            <Chip 
              size="small"
              label={model.type}
              variant="outlined"
            />
          )}
        </Box>

        {model.description && (
          <Typography variant="body2" color="text.secondary" sx={{ mt: 2 }}>
            {model.description}
          </Typography>
        )}
      </CardContent>
      
      <Box sx={{ p: 2, pt: 0 }}>
        <Checkbox 
          checked={selected}
          onChange={(e) => e.stopPropagation()}
          onClick={(e) => {
            e.stopPropagation();
            onSelect(model);
          }}
        />
      </Box>
    </Card>
  );
}

function BenchmarkExecution({ 
  selectedModels, 
  comparisonType, 
  selectedDataset, 
  selectedHardware 
}) {
  const [isRunning, setIsRunning] = useState(false);
  const [progress, setProgress] = useState(0);

  return (
    <Box sx={{ display: 'flex', flexDirection: 'column', gap: 3 }}>
      {/* Summary of selections */}
      <Paper sx={{ p: 3 }}>
        <Typography variant="h6" gutterBottom>Benchmark Summary</Typography>
        <Grid container spacing={2}>
          <Grid item xs={12}>
            <Typography variant="subtitle1">Selected Models:</Typography>
            <Box sx={{ display: 'flex', gap: 1, mt: 1 }}>
              {selectedModels.map(model => (
                <Chip key={model.id} label={model.name} />
              ))}
            </Box>
          </Grid>
          <Grid item xs={12}>
            <Typography variant="subtitle1">Comparison Type:</Typography>
            <Typography>{comparisonType === 'dataset' ? 'Dataset Performance' : 'Hardware Performance'}</Typography>
          </Grid>
          {comparisonType === 'dataset' ? (
            <Grid item xs={12}>
              <Typography variant="subtitle1">Selected Dataset:</Typography>
              <Typography>{datasets.find(d => d.id === selectedDataset)?.name}</Typography>
            </Grid>
          ) : (
            <Grid item xs={12}>
              <Typography variant="subtitle1">Selected Hardware:</Typography>
              <Typography>{hardwareOptions.find(h => h.id === selectedHardware)?.name}</Typography>
            </Grid>
          )}
        </Grid>
      </Paper>

      {/* Progress */}
      {isRunning && (
        <Box sx={{ width: '100%' }}>
          <LinearProgress variant="determinate" value={progress} />
          <Typography variant="body2" color="text.secondary" align="center" sx={{ mt: 1 }}>
            {progress}% Complete
          </Typography>
        </Box>
      )}
    </Box>
  );
}

function ModelSelection({ myModels, publicModels, selectedModels, setSelectedModels }) {
  const [activeCategory, setActiveCategory] = useState('my'); // 'my' or 'public'

  return (
    <Grid container spacing={3}>
      {/* Model Categories */}
      <Grid item xs={3}>
        <List>
          <ListItem 
            button 
            selected={activeCategory === 'my'}
            onClick={() => setActiveCategory('my')}
          >
            <ListItemText primary="My Models" secondary={`${myModels.length} models`} />
          </ListItem>
          <ListItem 
            button 
            selected={activeCategory === 'public'}
            onClick={() => setActiveCategory('public')}
          >
            <ListItemText primary="Public Models" secondary={`${publicModels.length} models`} />
          </ListItem>
        </List>
      </Grid>

      {/* Model Cards */}
      <Grid item xs={9}>
        <Grid container spacing={2}>
          {(activeCategory === 'my' ? myModels : publicModels).map(model => (
            <Grid item xs={4} key={model.id}>
              <ModelCard
                model={model}
                selected={selectedModels.some(m => m.id === model.id)}
                onSelect={(model) => handleModelSelect(model, selectedModels, setSelectedModels)}
              />
            </Grid>
          ))}
        </Grid>
      </Grid>
    </Grid>
  );
}

function ComparisonSelection({ 
  comparisonType, 
  setComparisonType,
  selectedDataset,
  setSelectedDataset,
  selectedHardware,
  setSelectedHardware 
}) {
  return (
    <Box sx={{ display: 'flex', flexDirection: 'column', gap: 3 }}>
      {/* Comparison Type Selection */}
      <Paper sx={{ p: 3 }}>
        <Typography variant="h6" gutterBottom>What would you like to compare?</Typography>
        <Grid container spacing={2}>
          <Grid item xs={6}>
            <Card 
              onClick={() => setComparisonType('dataset')}
              sx={{ 
                p: 2, 
                cursor: 'pointer',
                border: comparisonType === 'dataset' ? 2 : 1,
                borderColor: comparisonType === 'dataset' ? 'primary.main' : 'divider'
              }}
            >
              <Typography variant="subtitle1">Dataset Performance</Typography>
              <Typography variant="body2" color="text.secondary">
                Compare model performance (loss, accuracy) on specific datasets
              </Typography>
            </Card>
          </Grid>
          <Grid item xs={6}>
            <Card 
              onClick={() => setComparisonType('hardware')}
              sx={{ 
                p: 2, 
                cursor: 'pointer',
                border: comparisonType === 'hardware' ? 2 : 1,
                borderColor: comparisonType === 'hardware' ? 'primary.main' : 'divider'
              }}
            >
              <Typography variant="subtitle1">Hardware Performance</Typography>
              <Typography variant="body2" color="text.secondary">
                Compare model latency and throughput on different hardware
              </Typography>
            </Card>
          </Grid>
        </Grid>
      </Paper>

      {/* Dataset or Hardware Selection based on type */}
      {comparisonType === 'dataset' ? (
        <DatasetSelection 
          selectedDataset={selectedDataset}
          setSelectedDataset={setSelectedDataset}
        />
      ) : (
        <HardwareSelection 
          selectedHardware={selectedHardware}
          setSelectedHardware={setSelectedHardware}
        />
      )}
    </Box>
  );
}

function BenchmarkHistory() {
  const [selectedBenchmark, setSelectedBenchmark] = useState(null);
  
  return (
    <Grid container spacing={3}>
      {/* Benchmark List */}
      <Grid item xs={4}>
        <Paper sx={{ p: 2 }}>
          <Typography variant="h6" gutterBottom>Recent Benchmarks</Typography>
          <List>
            {benchmarkHistory.map(benchmark => (
              <ListItem 
                button 
                key={benchmark.id}
                selected={selectedBenchmark?.id === benchmark.id}
                onClick={() => setSelectedBenchmark(benchmark)}
              >
                <ListItemText 
                  primary={benchmark.name}
                  secondary={
                    <>
                      <Typography variant="caption" display="block">
                        {benchmark.date}
                      </Typography>
                      <Typography variant="caption" display="block">
                        {benchmark.models.length} models • {benchmark.type}
                      </Typography>
                    </>
                  }
                />
              </ListItem>
            ))}
          </List>
        </Paper>
      </Grid>

      {/* Benchmark Details */}
      <Grid item xs={8}>
        {selectedBenchmark && (
          <BenchmarkResults benchmark={selectedBenchmark} />
        )}
      </Grid>
    </Grid>
  );
}

function DatasetSelection({ selectedDataset, setSelectedDataset }) {
  return (
    <Grid container spacing={2}>
      {datasets.map(dataset => (
        <Grid item xs={4} key={dataset.id}>
          <Card 
            onClick={() => setSelectedDataset(dataset.id)}
            sx={{ 
              p: 2, 
              cursor: 'pointer',
              border: selectedDataset === dataset.id ? 2 : 1,
              borderColor: selectedDataset === dataset.id ? 'primary.main' : 'divider',
              height: '100%',
              display: 'flex',
              flexDirection: 'column'
            }}
          >
            <Typography variant="subtitle1" noWrap>{dataset.name}</Typography>
            <Typography variant="body2" color="text.secondary" gutterBottom>
              {dataset.description}
            </Typography>
            <Box sx={{ mt: 'auto', display: 'flex', gap: 1, flexWrap: 'wrap', alignItems: 'center' }}>
              {dataset.taskType && (
                <Chip size="small" label={dataset.taskType} />
              )}
              {dataset.type && (
                <Chip size="small" label={dataset.type} variant="outlined" />
              )}
              <Chip size="small" label={`${dataset.samples.toLocaleString()} samples`} color="primary" variant="outlined" />
              <Typography variant="caption" color="text.secondary">
                Updated {dataset.updated}
              </Typography>
            </Box>
          </Card>
        </Grid>
      ))}
    </Grid>
  );
}

function HardwareSelection({ selectedHardware, setSelectedHardware }) {
  return (
    <Grid container spacing={2}>
      {hardwareOptions.map(hw => (
        <Grid item xs={4} key={hw.id}>
          <Card 
            onClick={() => setSelectedHardware(hw.id)}
            sx={{ 
              p: 2, 
              cursor: 'pointer',
              border: selectedHardware === hw.id ? 2 : 1,
              borderColor: selectedHardware === hw.id ? 'primary.main' : 'divider'
            }}
          >
            <Typography variant="subtitle1">{hw.name}</Typography>
            <Typography variant="body2" color="text.secondary" gutterBottom>
              {hw.specs}
            </Typography>
            <Box sx={{ display: 'flex', justifyContent: 'space-between', mt: 1 }}>
              <Chip size="small" label={hw.cost} color="primary" />
              <Chip 
                size="small" 
                label={hw.availability} 
                color={
                  hw.availability === 'High' ? 'success' : 
                  hw.availability === 'Medium' ? 'warning' : 
                  'error'
                }
              />
            </Box>
          </Card>
        </Grid>
      ))}
    </Grid>
  );
}

function BenchmarkResults({ benchmark }) {
  const theme = useTheme();

  // Transform data for hardware performance charts
  const getHardwareChartData = (metricType) => {
    if (benchmark.type !== "Hardware Performance" || !benchmark.hardware) {
      return [];
    }

    return benchmark.models.map(model => ({
      name: model.split('/').pop(),
      ...benchmark.hardware.reduce((acc, hw) => ({
        ...acc,
        [hw]: benchmark.metrics[metricType][hw][model]
      }), {})
    }));
  };

  // Transform data for dataset performance charts
  const getDatasetChartData = (metricType) => {
    if (benchmark.type !== "Dataset Performance" || !benchmark.metrics.history) {
      return [];
    }

    return benchmark.metrics.history.map(point => ({
      step: point.step,
      ...benchmark.models.reduce((acc, model) => ({
        ...acc,
        [model.split('/').pop()]: point[model][metricType]
      }), {})
    }));
  };

  const MetricsChart = ({ data, metrics, title, yAxisLabel, height = 300 }) => (
    <Box sx={{ height: '100%', display: 'flex', flexDirection: 'column' }}>
      <Box sx={{ 
        display: 'flex', 
        justifyContent: 'center',
        mb: 0,
        gap: 1,
        flexWrap: 'wrap'
      }}>
        {metrics.map((metric) => (
          <Box
            key={metric.key}
            sx={{
              display: 'flex',
              alignItems: 'center',
              gap: 1,
              color: metric.color,
              backgroundColor: `${metric.color}15`,
              px: 1,
              py: 0.5,
              borderRadius: 2,
            }}
          >
            <Box sx={{ width: 10, height: 10, borderRadius: '50%', backgroundColor: metric.color }} />
            <Typography variant="body2">
              {metric.name}
            </Typography>
          </Box>
        ))}
      </Box>
      <ResponsiveContainer width="100%" height={height}>
        <LineChart 
          data={data}
          margin={{ top: 10, right: 30, left: 70, bottom: 40 }}
        >
          <CartesianGrid strokeDasharray="3 3" strokeOpacity={0.1} />
          <XAxis
            dataKey="step"
            label={{ 
              value: 'Step', 
              position: 'bottom', 
              offset: 20,
              style: { 
                fill: theme.palette.text.primary,
                fontSize: 14,
                fontWeight: 500
              }
            }}
            tick={{ 
              fill: theme.palette.text.secondary,
              fontSize: 12
            }}
          />
          <YAxis 
            label={{ 
              value: yAxisLabel, 
              angle: -90, 
              position: 'insideLeft',
              offset: -60,
              style: { 
                fill: theme.palette.text.primary,
                fontSize: 14,
                fontWeight: 500
              }
            }}
            tick={{ 
              fill: theme.palette.text.secondary,
              fontSize: 12
            }}
          />
          <Tooltip
            labelFormatter={(value) => `Step ${value}`}
            contentStyle={{
              backgroundColor: theme.palette.background.paper,
              border: `1px solid ${theme.palette.divider}`,
              borderRadius: 8,
            }}
          />
          {metrics.map((metric) => (
            <Line
              key={metric.key}
              type="monotone"
              dataKey={metric.key}
              stroke={metric.color}
              name={metric.name}
              dot={false}
              strokeWidth={2}
            />
          ))}
        </LineChart>
      </ResponsiveContainer>
    </Box>
  );

  return (
    <Box sx={{ display: 'flex', flexDirection: 'column', gap: 3 }}>
      {/* Benchmark Header */}
      <Paper sx={{ p: 2 }}>
        <Typography variant="h6">{benchmark.name}</Typography>
        <Box sx={{ display: 'flex', gap: 2, mt: 1, flexWrap: 'wrap' }}>
          <Chip label={benchmark.type} />
          <Chip label={benchmark.date} variant="outlined" />
          <Chip label={`${benchmark.models.length} models`} variant="outlined" />
          {benchmark.dataset && <Chip label={`Dataset: ${benchmark.dataset}`} variant="outlined" />}
          {benchmark.hardware && benchmark.hardware.map(hw => (
            <Chip key={hw} label={hw} variant="outlined" />
          ))}
        </Box>
      </Paper>

      {/* Charts */}
      <Grid container spacing={3}>
        {benchmark.type === "Dataset Performance" ? (
          <>
            <Grid item xs={12}>
              <Paper sx={{ p: 2 }}>
                <Typography variant="h6" gutterBottom>Accuracy Over Time</Typography>
                <Box sx={{ height: 400 }}>
                  <MetricsChart
                    data={getDatasetChartData('accuracy')}
                    metrics={benchmark.models.map((model, index) => ({
                      key: model.split('/').pop(),
                      name: model.split('/').pop(),
                      color: `hsl(${index * 120}, 70%, 50%)`
                    }))}
                    yAxisLabel="Accuracy"
                    height={350}
                  />
                </Box>
              </Paper>
            </Grid>
            <Grid item xs={12}>
              <Paper sx={{ p: 2 }}>
                <Typography variant="h6" gutterBottom>Perplexity Over Time</Typography>
                <Box sx={{ height: 400 }}>
                  <MetricsChart
                    data={getDatasetChartData('perplexity')}
                    metrics={benchmark.models.map((model, index) => ({
                      key: model.split('/').pop(),
                      name: model.split('/').pop(),
                      color: `hsl(${index * 120}, 70%, 50%)`
                    }))}
                    yAxisLabel="Perplexity"
                    height={350}
                  />
                </Box>
              </Paper>
            </Grid>
          </>
        ) : (
          <>
            <Grid item xs={12}>
              <Paper sx={{ p: 2 }}>
                <Typography variant="subtitle1" gutterBottom>Latency Comparison (ms/token)</Typography>
                <Box sx={{ width: '100%', height: 400 }}>
                  <BarChart
                    width={800}
                    height={350}
                    data={getHardwareChartData('latency')}
                    margin={{ top: 20, right: 30, left: 20, bottom: 5 }}
                  >
                    <CartesianGrid strokeDasharray="3 3" />
                    <XAxis dataKey="name" />
                    <YAxis />
                    <Tooltip />
                    <Legend />
                    {benchmark.hardware.map((hw, index) => (
                      <Bar 
                        key={hw}
                        dataKey={hw}
                        fill={`hsl(${index * 120}, 70%, 50%)`}
                      />
                    ))}
                  </BarChart>
                </Box>
              </Paper>
            </Grid>
            <Grid item xs={12}>
              <Paper sx={{ p: 2 }}>
                <Typography variant="subtitle1" gutterBottom>Throughput Comparison (tokens/second)</Typography>
                <Box sx={{ width: '100%', height: 400 }}>
                  <BarChart
                    width={800}
                    height={350}
                    data={getHardwareChartData('throughput')}
                    margin={{ top: 20, right: 30, left: 20, bottom: 5 }}
                  >
                    <CartesianGrid strokeDasharray="3 3" />
                    <XAxis dataKey="name" />
                    <YAxis />
                    <Tooltip />
                    <Legend />
                    {benchmark.hardware.map((hw, index) => (
                      <Bar 
                        key={hw}
                        dataKey={hw}
                        fill={`hsl(${index * 120}, 70%, 50%)`}
                      />
                    ))}
                  </BarChart>
                </Box>
              </Paper>
            </Grid>
          </>
        )}
      </Grid>
    </Box>
  );
}

// Helper functions
function canProceed(step, selectedModels, comparisonType, selectedDataset, selectedHardware) {
  switch (step) {
    case 0: // Model Selection
      return selectedModels.length >= 2;
    case 1: // Comparison Selection
      return comparisonType === 'dataset' ? 
        selectedDataset !== null : 
        selectedHardware !== null;
    case 2: // Benchmark Execution
      return true;
    default:
      return false;
  }
}

function handleModelSelect(model, selectedModels, setSelectedModels) {
  if (selectedModels.find(m => m.id === model.id)) {
    setSelectedModels(selectedModels.filter(m => m.id !== model.id));
  } else {
    setSelectedModels([...selectedModels, model]);
  }
}

function handleBenchmarkStart() {
  // This would typically make an API call to start the benchmark
  console.log('Starting benchmark...');
  // You could also show a success message or redirect to the history view
}