Example
File Transcription Job with all Features Enabled
- API
- Python SDK
Request
curl --location 'voice.neuralspace.ai/api/v1/jobs' \
--header 'Authorization: sk_ccc189928681a411e7a538201bc650c50167eb2363...' \
--form 'files=@"u71yZXMfE/test_audio_en.m4a"' \
--form 'config="{\"file_transcription\": {\"language_id\": \"en\", \"mode\": \"advanced\", \"number_formatting\": \"words\"}, \"speaker_diarization\": {}, \"translation\": {\"source_language\": \"en\", \"target_languages\": [\"hi\", \"ar\"]}, \"summarize\": true, \"sentiment_detect\": true}"'
Response
{
"success": true,
"message": "Job created successfully",
"data": {
"jobId": "6abe4f35-8220-4981-95c7-3b040d9b86d1"
}
}
Script
import requests
import neuralspace as ns
filename = 'english_audio_sample.mp3'
# Download the sample audio file
print('Downloading sample audio file...')
resp = requests.get('https://github.com/Neural-Space/neuralspace-examples/raw/main/datasets/transcription/en/english_audio_sample.mp3')
with open(filename, 'wb') as fp:
fp.write(resp.content)
vai = ns.VoiceAI()
# or,
# vai = ns.VoiceAI(api_key='YOUR_API_KEY')
# Setup job configuration
config = {
'file_transcription': {
'language_id': 'en',
'mode': 'advanced',
},
},
'speaker_diarization': {},
'summarize': True,
'sentiment_detect': True,
'translation': {
'target_languages': [
'hi',
'ar'
]
}
}
# Create a new file transcription job
job_id = vai.transcribe(file=filename, config=config)
print(f'Created job: {job_id}')
# Check the job's status
result = vai.get_job_status(job_id)
print(f'Current status:\n{result}')
# This should finish in a minute for the sample audio used here.
# It will depend on the duration of the audio file and other config options.
print('Waiting for completion...')
result = vai.poll_until_complete(job_id)
print(result)
Output
Downloading sample audio file...
Created job: 6abe4f35-8220-4981-95c7-3b040d9b86d1
Current status:
{
"success": True,
"message": "Data fetched successfully",
"data": {
"timestamp": 1695210581508,
"filename": "test_audio_en.m4a",
"jobId": "6abe4f35-8220-4981-95c7-3b040d9b86d1",
"params": {
"file_transcription": {
"language_id": "en",
"mode": "advanced"
}
},
"status": "Queued",
"audioDuration": 131.568,
"messsage": "",
"progress": [
"Queued"
]
}
}
Waiting for completion...
{
"success": true,
"message": "Data fetched successfully",
"data": {
"timestamp": 1695891454965,
"filename": "test_audio_en.m4a",
"jobId": "6abe4f35-8220-4981-95c7-3b040d9b86d1",
"params": {
"file_transcription": {
"language_id": "en",
"mode": "advanced",
"number_formatting": "words"
},
"translation": {
"source_language": "en",
"target_languages": [
"hi",
"ar"
]
},
"summarize": true,
"sentiment_detect": true
},
"status": "Completed",
"audioDuration": 3.232,
"messsage": "",
"progress": [
"Queued",
"Started",
"Transcription Started",
"Speaker Diarization Started",
"Speaker Diarization Completed",
"Transcription Completed",
"Subtitles Started",
"Subtitles Completed",
"Sentiment Detection Started",
"Summarization Started",
"Translation Started",
"Translation Completed",
"Sentiment Detection Completed",
"Summarization Completed",
"Completed"
],
"result": {
"transcription": {
"transcript": "This is the test audio file.",
"timestamps": [
{
"word": "This",
"start": 0.8,
"end": 1.12,
"conf": 0.82
},
{
"word": "is",
"start": 1.12,
"end": 1.36,
"conf": 0.82
},
{
"word": "the",
"start": 1.36,
"end": 1.5,
"conf": 0.91
},
{
"word": "test",
"start": 1.5,
"end": 1.8,
"conf": 0.99
},
{
"word": "audio",
"start": 1.8,
"end": 2.1,
"conf": 0.7
},
{
"word": "file.",
"start": 2.1,
"end": 2.76,
"conf": 0.99
}
],
"segments": [
{
"startTime": 0.8184375,
"endTime": 2.6746875,
"text": "This is the test audio file.",
"speaker": "Speaker 0",
"sentiment": "neutral"
}
],
"summary": "Summary: The audio file is a test recording.\n\nActionable Items: None\n\nInsights: None"
},
"translation": {
"hi": {
"text": "यह परीक्षण ऑडियो फ़ाइल है.",
"segments": [
{
"startTime": 0.8184375,
"endTime": 2.6746875,
"text": "यह परीक्षण ऑडियो फ़ाइल है.",
"speaker": "Speaker 0"
}
],
"summary": "सारांश: ऑडियो फ़ाइल एक परीक्षण रिकॉर्डिंग है।\n\nकार्रवाई योग्य वस्तुएँ: कोई नहीं\n\nअंतर्दृष्टि: कोई नहीं"
},
"ar": {
"text": "هذا هو ملف الصوت التجريبي.",
"segments": [
{
"startTime": 0.8184375,
"endTime": 2.6746875,
"text": "هذا هو ملف الصوت التجريبي.",
"speaker": "Speaker 0"
}
],
"summary": "ملخص: الملف الصوتي هو تسجيل اختباري.\n\nالعناصر القابلة للتنفيذ: لا يوجد\n\nرؤى: لا شيء"
}
},
"sentiment_detection": {
"overall": "neutral"
}
}
}
}
File Transcription Full Result
- API
- Python SDK
Request
curl --location 'voice.neuralspace.ai/api/v1/jobs/6abe4f35-8220-4981-95c7-3b040d9b86d1' \
--header 'Authorization: sk_ccc189928681a411e7a538201bc650c50167eb2363...'
Response
{
"success": true,
"message": "Data fetched successfully",
"data": {
"timestamp": 1695891454965,
"filename": "test_audio_en.m4a",
"jobId": "6abe4f35-8220-4981-95c7-3b040d9b86d1",
"params": {
"file_transcription": {
"language_id": "en",
"mode": "advanced",
"number_formatting": "words"
},
"translation": {
"source_language": "en",
"target_languages": [
"hi",
"ar"
]
},
"summarize": true,
"sentiment_detect": true
},
"status": "Completed",
"audioDuration": 3.232,
"messsage": "",
"progress": [
"Queued",
"Started",
"Transcription Started",
"Speaker Diarization Started",
"Speaker Diarization Completed",
"Transcription Completed",
"Subtitles Started",
"Subtitles Completed",
"Sentiment Detection Started",
"Summarization Started",
"Translation Started",
"Translation Completed",
"Sentiment Detection Completed",
"Summarization Completed",
"Completed"
],
"result": {
"transcription": {
"transcript": "This is the test audio file.",
"timestamps": [
{
"word": "This",
"start": 0.8,
"end": 1.12,
"conf": 0.82
},
{
"word": "is",
"start": 1.12,
"end": 1.36,
"conf": 0.82
},
{
"word": "the",
"start": 1.36,
"end": 1.5,
"conf": 0.91
},
{
"word": "test",
"start": 1.5,
"end": 1.8,
"conf": 0.99
},
{
"word": "audio",
"start": 1.8,
"end": 2.1,
"conf": 0.7
},
{
"word": "file.",
"start": 2.1,
"end": 2.76,
"conf": 0.99
}
],
"segments": [
{
"startTime": 0.8184375,
"endTime": 2.6746875,
"text": "This is the test audio file.",
"speaker": "Speaker 0",
"sentiment": "neutral"
}
],
"summary": "Summary: The audio file is a test recording.\n\nActionable Items: None\n\nInsights: None"
},
"translation": {
"hi": {
"text": "यह परीक्षण ऑडियो फ़ाइल है.",
"segments": [
{
"startTime": 0.8184375,
"endTime": 2.6746875,
"text": "यह परीक्षण ऑडियो फ़ाइल है.",
"speaker": "Speaker 0"
}
],
"summary": "सारांश: ऑडियो फ़ाइल एक परीक्षण रिकॉर्डिंग है।\n\nकार्रवाई योग्य वस्तुएँ: कोई नहीं\n\nअंतर्दृष्टि: कोई नहीं"
},
"ar": {
"text": "هذا هو ملف الصوت التجريبي.",
"segments": [
{
"startTime": 0.8184375,
"endTime": 2.6746875,
"text": "هذا هو ملف الصوت التجريبي.",
"speaker": "Speaker 0"
}
],
"summary": "ملخص: الملف الصوتي هو تسجيل اختباري.\n\nالعناصر القابلة للتنفيذ: لا يوجد\n\nرؤى: لا شيء"
}
},
"sentiment_detection": {
"overall": "neutral"
}
}
}
}
Script
import requests
import neuralspace as ns
vai = ns.VoiceAI()
# or,
# vai = ns.VoiceAI(api_key='YOUR_API_KEY')
# Assign your job's ID to a variable
job_id = "6abe4f35-8220-4981-95c7-3b040d9b86d1"
# Fetch a completed job's result
result = vai.get_job_status(job_id)
print(result)
Output
{
"success": true,
"message": "Data fetched successfully",
"data": {
"timestamp": 1695891454965,
"filename": "test_audio_en.m4a",
"jobId": "6abe4f35-8220-4981-95c7-3b040d9b86d1",
"params": {
"file_transcription": {
"language_id": "en",
"mode": "advanced",
"number_formatting": "words"
},
"translation": {
"source_language": "en",
"target_languages": [
"hi",
"ar"
]
},
"summarize": true,
"sentiment_detect": true
},
"status": "Completed",
"audioDuration": 3.232,
"messsage": "",
"progress": [
"Queued",
"Started",
"Transcription Started",
"Speaker Diarization Started",
"Speaker Diarization Completed",
"Transcription Completed",
"Subtitles Started",
"Subtitles Completed",
"Sentiment Detection Started",
"Summarization Started",
"Translation Started",
"Translation Completed",
"Sentiment Detection Completed",
"Summarization Completed",
"Completed"
],
"result": {
"transcription": {
"transcript": "This is the test audio file.",
"timestamps": [
{
"word": "This",
"start": 0.8,
"end": 1.12,
"conf": 0.82
},
{
"word": "is",
"start": 1.12,
"end": 1.36,
"conf": 0.82
},
{
"word": "the",
"start": 1.36,
"end": 1.5,
"conf": 0.91
},
{
"word": "test",
"start": 1.5,
"end": 1.8,
"conf": 0.99
},
{
"word": "audio",
"start": 1.8,
"end": 2.1,
"conf": 0.7
},
{
"word": "file.",
"start": 2.1,
"end": 2.76,
"conf": 0.99
}
],
"segments": [
{
"startTime": 0.8184375,
"endTime": 2.6746875,
"text": "This is the test audio file.",
"speaker": "Speaker 0",
"sentiment": "neutral"
}
],
"summary": "Summary: The audio file is a test recording.\n\nActionable Items: None\n\nInsights: None"
},
"translation": {
"hi": {
"text": "यह परीक्षण ऑडियो फ़ाइल है.",
"segments": [
{
"startTime": 0.8184375,
"endTime": 2.6746875,
"text": "यह परीक्षण ऑडियो फ़ाइल है.",
"speaker": "Speaker 0"
}
],
"summary": "सारांश: ऑडियो फ़ाइल एक परीक्षण रिकॉर्डिंग है।\n\nकार्रवाई योग्य वस्तुएँ: कोई नहीं\n\nअंतर्दृष्टि: कोई नहीं"
},
"ar": {
"text": "هذا هو ملف الصوت التجريبي.",
"segments": [
{
"startTime": 0.8184375,
"endTime": 2.6746875,
"text": "هذا هو ملف الصوت التجريبي.",
"speaker": "Speaker 0"
}
],
"summary": "ملخص: الملف الصوتي هو تسجيل اختباري.\n\nالعناصر القابلة للتنفيذ: لا يوجد\n\nرؤى: لا شيء"
}
},
"sentiment_detection": {
"overall": "neutral"
}
}
}
}