wyodrebnienie call_id

tills

Jan 8th, 2025

106

Never

Add comment

Not a member of Pastebin yet? Sign Up, it unlocks many cool features!

Python 2.17 KB | None | 0 0

raw download clone embed print report

def extract_call_segments(full_id):
"""
Extracts the first three underscore-separated segments from a call ID.
For example:
Input: "0240828_31657_2958059406I0L412_20240828_084737_123427680"
Output: "0240828_31657_2958059406I0L412"
Args:
full_id (str): The complete call ID string
Returns:
str: The first three segments joined by underscores
"""
# Split the string by underscores
segments = full_id.split("_")
# Take only the first three segments and join them back with underscores
return "_".join(segments[:3])
def process_transcription(json_content):
"""
Konwertuje transkrypcję z formatu JSON na sformatowany tekst
z oznaczeniami mówiącego i czasem. Dodatkowo wyodrębnia call_id
ze ścieżki źródłowej.
"""
# Parsowanie JSONa
data = (
json.loads(json_content)
if isinstance(json_content, str)
else json_content
)
# Wyodrębnienie call_id ze ścieżki źródłowej
source_url = data.get("source", "")
call_id = ""
if "speechstudiofilename=" in source_url:
# Znajdujemy część URL po "speechstudiofilename="
full_id = source_url.split("speechstudiofilename=")[-1]
# Usuwamy rozszerzenie .wav jeśli występuje
full_id = full_id.replace(".wav", "")
# Wyodrębniamy tylko pierwsze trzy segmenty
call_id = extract_call_segments(full_id)
phrases = []
for phrase in data.get("recognizedPhrases", []):
channel = phrase.get("channel", 0)
speaker = "Agent" if channel == 1 else "Klient"
offset_ticks = float(phrase.get("offsetInTicks", 0))
timestamp = convert_ticks_to_timestamp(offset_ticks)
if phrase.get("nBest") and phrase["nBest"][0].get("display"):
text = phrase["nBest"][0]["display"].strip()
if text and text != "Napisy stworzone przez społeczność Amara.org":
phrases.append((offset_ticks, f"{speaker} {timestamp} {text}"))
phrases.sort(key=lambda x: x[0])
formatted_text = "\n".join(phrase[1] for phrase in phrases)
return formatted_text, call_id

Add Comment

Please, Sign In to add comment