19 lines
457 B
Python
19 lines
457 B
Python
import pysrt
|
|
import pandas as pd
|
|
|
|
|
|
def parse_srt_to_df(filepath: str, encoding: str = "cp1251") -> pd.DataFrame:
|
|
subs = pysrt.open(filepath, encoding=encoding)
|
|
rows = []
|
|
|
|
for sub in subs:
|
|
text = sub.text.replace('\n', ' ').strip()
|
|
if text:
|
|
rows.append({
|
|
"start": sub.start.to_time(),
|
|
"end": sub.end.to_time(),
|
|
"text": text
|
|
})
|
|
|
|
return pd.DataFrame(rows)
|