// src/Research.js
import React, { useState } from 'react';
import Typewriter from './Typewriter';


function Research() {
  const [isExpanded, setIsExpanded] = useState({});

  const toggleAbstract = (index) => {
    setIsExpanded((prevState) => ({
      ...prevState,
      [index]: !prevState[index],
    }));
  };

  const papers = [
    {
      title: 'Enhancing Neural Network Interpretability with Feature-Aligned Sparse Autoencoders',
      file: '/papers/Enhancing Neural Network Interpretability with Feature-Aligned Sparse Autoencoders.pdf',
      abstract: `Sparse Autoencoders (SAEs) have shown promise in improving the interpretability of neural network activations, but can learn features that are not features of the input, limiting their effectiveness. We propose MUTUAL FEATURE REGULARIZATION (MFR), a regularization technique for improving feature learning by encouraging SAEs trained in parallel to learn similar features. We motivate MFR by showing
      that features learned by multiple SAEs are more likely to correlate with features
      of the input. By training on synthetic data with known features of the input, we
      show that MFR can help SAEs learn those features, as we can directly compare
      the features learned by the SAE with the input features for the synthetic data.
      We then scale MFR to SAEs that are trained to denoise electroencephalography
      (EEG) data and SAEs that are trained to reconstruct GPT-2 Small activations. We
      show that MFR can improve the reconstruction loss of SAEs by up to 21.21%
      on GPT-2 Small, and 6.67% on EEG data. Our results suggest that the similarity
      between features learned by different SAEs can be leveraged to improve SAE
      training, thereby enhancing performance and the usefulness of SAEs for model
      interpretability`,
     authors: `Luke Marks, Alasdair Paren, David Krueger, Fazl Barez`, 
    },
    {
      title: 'Sparse Autoencoders Reveal Universal Feature Spaces Across Large Language Models',
      file: '/papers/Sparse Autoencoders Reveal Universal Feature Spaces Across Large Language Models.pdf',
      abstract: `We investigate feature universality in large language models (LLMs), a research
      field that aims to understand how different models similarly represent concepts in
      the latent spaces of their intermediate layers. Demonstrating feature universality
      allows discoveries about latent representations to generalize across several models.
      However, comparing features across LLMs is challenging due to polysemanticity,
      in which individual neurons often correspond to multiple features rather than
      distinct ones. This makes it difficult to disentangle and match features across
      different models. To address this issue, we employ a method known as dictionary
      learning by using sparse autoencoders (SAEs) to transform LLM activations into
      more interpretable spaces spanned by neurons corresponding to individual features.
      After matching feature neurons across models via activation correlation, we apply
      representational space similarity metrics like Singular Value Canonical Correlation
      Analysis to analyze these SAE features across different LLMs. Our experiments
      reveal significant similarities in SAE feature spaces across various LLMs, providing
      new evidence for feature universality`,
      authors: 'Michael Lan, Philip Torr, Austin Meek, Ashkan Khakzar, David Krueger, Fazl Barez'
    },
    {
      title: `Towards Interpreting Visual Information Processing in Vision-Language Models`,
      file: '/papers/Towards Interpreting Visual Information Processing in Vision-Language Models.pdf',
      abstract: `Vision-Language Models (VLMs) are powerful tools for processing and understanding text and images. We study the processing of visual tokens in the language model component of LLaVA, a prominent VLM. Our approach focuses on
      analyzing the localization of object information, the evolution of visual token representations across layers, and the mechanism of integrating visual information
      for predictions. Through ablation studies, we demonstrated that object identification accuracy drops by over 70% when object-specific tokens are removed. We
      observed that visual token representations become increasingly interpretable in
      the vocabulary space across layers, suggesting an alignment with textual tokens
      corresponding to image content. Finally, we found that the model extracts object
      information from these refined representations at the last token position for prediction, mirroring the process in text-only language models for factual association
      tasks. These findings provide crucial insights into how VLMs process and integrate visual information, bridging the gap between our understanding of language
      and vision models, and paving the way for more interpretable and controllable
      multimodal systems.`,
      authors: `Clement Neo, Luke Ong, Philip Torr, Mor Geva, David Krueger, Fazl Barez`,
    },
    {
      title: 'PoisonBench: Assessing Large Language Model Vulnerability To Data Poisoning',
      file: '/papers/Poisonbench - Assessing Large Language Model Vulnerability To Data Poisoning.pdf',
      abstract: `Preference learning is a central component for aligning current LLMs, but this
      process can be vulnerable to data poisoning attacks. To address this concern, we
      introduce POISONBENCH, a benchmark for evaluating large language models
      susceptibility to data poisoning during preference learning. Data poisoning attacks can manipulate large language model responses to include hidden malicious
      content or biases, potentially causing the model to generate harmful or unintended
      outputs while appearing to function normally. We deploy two distinct attack types
      across eight realistic scenarios, assessing 21 widely-used models. Our findings reveal concerning trends: (1) Scaling up parameter size does not inherently enhance
      resilience against poisoning attacks; (2) There exists a log-linear relationship between the effects of the attack and the data poison ratio; (3) The effect of data poisoning can generalize to extrapolated triggers that are not included in the poisoned
      data. These results expose weaknesses in current preference learning techniques,
      highlighting the urgent need for more robust defenses against malicious models
      and data manipulation.`,
      authors: 'Tingchen Fu, Mrinank Sharma, Philip Torr, Shay B. Cohen, David Krueger, Fazl Barez'
    },
    {
      title: 'Safeguarding AI in Finance: Lessons for Regulated Industries',
      file: '/papers/Safeguarding AI in Finance.pdf',
      abstract: `Artificial intelligence (AI) is being actively deployed across the financial services
      industry. The responsible adoption of AI in finance requires mitigating novel
      risks related to data privacy, information security, and regulatory compliance. We
      taxonomize these risks with a focus on data leakage, negligent use of AI, reliability
      of AI and training data poisoning. We discuss defensive countermeasures such
      as techniques to improve adversarial robustness, training on encrypted data, and
      evaluations. These countermeasures aim to avoid damages, ensure regulatory
      compliance, and meet consumer expectations. We believe our taxonomy and
      suggested countermesaures are an important step toward responsible adoption of
      AI across financial institutions.`,
      authors: 'Fazl Barez, Luke Marks'
    }

  ];

  return (
    <div className="research-page">
            <h1>
              <Typewriter
               text="Research"
              speed={60}/></h1>
      <ul>
        {papers.map((paper, index) => (
          <li key={index} className="paper-item">
            <a href={paper.file} target="_blank" rel="noopener noreferrer">
              {paper.title}
            </a>
            <p className="authors">{paper.authors}</p>
            <p className="abstract">
              {isExpanded[index] ? paper.abstract : `${paper.abstract.slice(0, 150)}... `}
              <button className="read-more" onClick={() => toggleAbstract(index)}>
                {isExpanded[index] ? 'Read Less' : 'Read More'}
              </button>
            </p>
          </li>
        ))}
      </ul>
    </div>
  );
}

export default Research;
