Day4-5 (10.28-29, 목-금)
1. 코드 흐름 이해하기
Retriever 데이터 가져오기
def run_sparse_retrieval(
tokenize_fn: Callable[[str], List[str]],
datasets: DatasetDict,
training_args: TrainingArguments,
data_args: DataTrainingArguments,
data_path: str = "../data",
context_path: str = "wikipedia_documents.json",
) -> DatasetDict:df = retriever.retrieve(datasets["validation"], topk=data_args.top_k_retrieval)doc_scores, doc_indices = self.get_relevant_doc_bulk(
query_or_dataset["question"], k=topk
)for i in range(result.shape[0]):
sorted_result = np.argsort(result[i, :])[::-1]
doc_scores.append(result[i, :][sorted_result].tolist()[:k])
doc_indices.append(sorted_result.tolist()[:k])
Last updated