recap_gen/data_loader/dir_loader.py

22 lines
732 B
Python

from pathlib import Path
import pandas as pd
from subtitles.parser import parse_srt_to_df # your existing parser
def load_subtitles_from_dir(folder: str | Path, ext: str = ".srt") -> pd.DataFrame:
"""
Читает каждый файл из заданной папки и возвращает обьединенный pd.DataFrame
"""
folder = Path(folder)
all_frames: list[pd.DataFrame] = []
for file in folder.glob(f"*{ext}"):
df_file = parse_srt_to_df(file)
df_file["source_file"] = file.name
all_frames.append(df_file)
if not all_frames:
raise FileNotFoundError(f"No {ext} files found in {folder.resolve()}")
return pd.concat(all_frames, ignore_index=True)