24 lines
595 B
Python
24 lines
595 B
Python
import pysrt
|
|
import pandas as pd
|
|
import chardet
|
|
from pathlib import Path
|
|
|
|
|
|
def parse_srt_to_df(filepath: str) -> pd.DataFrame:
|
|
print(f"Загружаю {filepath}")
|
|
raw = Path(filepath).read_bytes()[:4096]
|
|
enc = chardet.detect(raw)["encoding"]
|
|
subs = pysrt.open(filepath, encoding=enc)
|
|
rows = []
|
|
|
|
for sub in subs:
|
|
text = sub.text.replace('\n', ' ').strip()
|
|
if text:
|
|
rows.append({
|
|
"start": sub.start.to_time(),
|
|
"end": sub.end.to_time(),
|
|
"text": text
|
|
})
|
|
|
|
return pd.DataFrame(rows)
|