Example

File Transcription Job with all Features Enabled

API
Python SDK

Request

curl --location 'voice.neuralspace.ai/api/v1/jobs' \
--header 'Authorization: sk_ccc189928681a411e7a538201bc650c50167eb2363...' \
--form 'files=@"u71yZXMfE/test_audio_en.m4a"' \
--form 'config="{\"file_transcription\": {\"language_id\": \"en\", \"mode\": \"advanced\", \"number_formatting\": \"words\"}, \"speaker_diarization\": {}, \"translation\": {\"source_language\": \"en\", \"target_languages\": [\"hi\", \"ar\"]}, \"summarize\": true, \"sentiment_detect\": true}"'

Response

{
    "success": true,
    "message": "Job created successfully",
    "data": {
        "jobId": "6abe4f35-8220-4981-95c7-3b040d9b86d1"
    }
}

Script

import requests
import neuralspace as ns

filename = 'english_audio_sample.mp3'

# Download the sample audio file
print('Downloading sample audio file...')
resp = requests.get('https://github.com/Neural-Space/neuralspace-examples/raw/main/datasets/transcription/en/english_audio_sample.mp3')
with open(filename, 'wb') as fp:
    fp.write(resp.content)


vai = ns.VoiceAI()
# or,
# vai = ns.VoiceAI(api_key='YOUR_API_KEY')

# Setup job configuration
config = {
    'file_transcription': {
        'language_id': 'en',
        'mode': 'advanced',
    },
        },
    'speaker_diarization': {},
    'summarize': True,
    'sentiment_detect': True,
    'translation': {
        'target_languages': [
            'hi',
            'ar'
        ]
    }
}

# Create a new file transcription job
job_id = vai.transcribe(file=filename, config=config)
print(f'Created job: {job_id}')

# Check the job's status
result = vai.get_job_status(job_id)
print(f'Current status:\n{result}')

# This should finish in a minute for the sample audio used here.
# It will depend on the duration of the audio file and other config options.
print('Waiting for completion...')
result = vai.poll_until_complete(job_id)
print(result)

Output

Downloading sample audio file...
Created job: 6abe4f35-8220-4981-95c7-3b040d9b86d1
Current status:
{
  "success": True,
  "message": "Data fetched successfully",
  "data": {
    "timestamp": 1695210581508,
    "filename": "test_audio_en.m4a",
    "jobId": "6abe4f35-8220-4981-95c7-3b040d9b86d1",
    "params": {
      "file_transcription": {
        "language_id": "en",
        "mode": "advanced"
      }
    },
    "status": "Queued",
    "audioDuration": 131.568,
    "messsage": "",
    "progress": [
      "Queued"
    ]
  }
}
Waiting for completion...
{
    "success": true,
    "message": "Data fetched successfully",
    "data": {
        "timestamp": 1695891454965,
        "filename": "test_audio_en.m4a",
        "jobId": "6abe4f35-8220-4981-95c7-3b040d9b86d1",
        "params": {
            "file_transcription": {
                "language_id": "en",
                "mode": "advanced",
                "number_formatting": "words"
            },
            "translation": {
                "source_language": "en",
                "target_languages": [
                    "hi",
                    "ar"
                ]
            },
            "summarize": true,
            "sentiment_detect": true
        },
        "status": "Completed",
        "audioDuration": 3.232,
        "messsage": "",
        "progress": [
            "Queued",
            "Started",
            "Transcription Started",
            "Speaker Diarization Started",
            "Speaker Diarization Completed",
            "Transcription Completed",
            "Subtitles Started",
            "Subtitles Completed",
            "Sentiment Detection Started",
            "Summarization Started",
            "Translation Started",
            "Translation Completed",
            "Sentiment Detection Completed",
            "Summarization Completed",
            "Completed"
        ],
        "result": {
            "transcription": {
                "transcript": "This is the test audio file.",
                "timestamps": [
                    {
                        "word": "This",
                        "start": 0.8,
                        "end": 1.12,
                        "conf": 0.82
                    },
                    {
                        "word": "is",
                        "start": 1.12,
                        "end": 1.36,
                        "conf": 0.82
                    },
                    {
                        "word": "the",
                        "start": 1.36,
                        "end": 1.5,
                        "conf": 0.91
                    },
                    {
                        "word": "test",
                        "start": 1.5,
                        "end": 1.8,
                        "conf": 0.99
                    },
                    {
                        "word": "audio",
                        "start": 1.8,
                        "end": 2.1,
                        "conf": 0.7
                    },
                    {
                        "word": "file.",
                        "start": 2.1,
                        "end": 2.76,
                        "conf": 0.99
                    }
                ],
                "segments": [
                    {
                        "startTime": 0.8184375,
                        "endTime": 2.6746875,
                        "text": "This is the test audio file.",
                        "speaker": "Speaker 0",
                        "sentiment": "neutral"
                    }
                ],
                "summary": "Summary: The audio file is a test recording.\n\nActionable Items: None\n\nInsights: None"
            },
            "translation": {
                "hi": {
                    "text": "यह परीक्षण ऑडियो फ़ाइल है.",
                    "segments": [
                        {
                            "startTime": 0.8184375,
                            "endTime": 2.6746875,
                            "text": "यह परीक्षण ऑडियो फ़ाइल है.",
                            "speaker": "Speaker 0"
                        }
                    ],
                    "summary": "सारांश: ऑडियो फ़ाइल एक परीक्षण रिकॉर्डिंग है।\n\nकार्रवाई योग्य वस्तुएँ: कोई नहीं\n\nअंतर्दृष्टि: कोई नहीं"
                },
                "ar": {
                    "text": "هذا هو ملف الصوت التجريبي.",
                    "segments": [
                        {
                            "startTime": 0.8184375,
                            "endTime": 2.6746875,
                            "text": "هذا هو ملف الصوت التجريبي.",
                            "speaker": "Speaker 0"
                        }
                    ],
                    "summary": "ملخص: الملف الصوتي هو تسجيل اختباري.\n\nالعناصر القابلة للتنفيذ: لا يوجد\n\nرؤى: لا شيء"
                }
            },
            "sentiment_detection": {
                "overall": "neutral"
            }
        }
    }
}

File Transcription Full Result

API
Python SDK

Request

curl --location 'voice.neuralspace.ai/api/v1/jobs/6abe4f35-8220-4981-95c7-3b040d9b86d1' \
--header 'Authorization: sk_ccc189928681a411e7a538201bc650c50167eb2363...'

Response

{
    "success": true,
    "message": "Data fetched successfully",
    "data": {
        "timestamp": 1695891454965,
        "filename": "test_audio_en.m4a",
        "jobId": "6abe4f35-8220-4981-95c7-3b040d9b86d1",
        "params": {
            "file_transcription": {
                "language_id": "en",
                "mode": "advanced",
                "number_formatting": "words"
            },
            "translation": {
                "source_language": "en",
                "target_languages": [
                    "hi",
                    "ar"
                ]
            },
            "summarize": true,
            "sentiment_detect": true
        },
        "status": "Completed",
        "audioDuration": 3.232,
        "messsage": "",
        "progress": [
            "Queued",
            "Started",
            "Transcription Started",
            "Speaker Diarization Started",
            "Speaker Diarization Completed",
            "Transcription Completed",
            "Subtitles Started",
            "Subtitles Completed",
            "Sentiment Detection Started",
            "Summarization Started",
            "Translation Started",
            "Translation Completed",
            "Sentiment Detection Completed",
            "Summarization Completed",
            "Completed"
        ],
        "result": {
            "transcription": {
                "transcript": "This is the test audio file.",
                "timestamps": [
                    {
                        "word": "This",
                        "start": 0.8,
                        "end": 1.12,
                        "conf": 0.82
                    },
                    {
                        "word": "is",
                        "start": 1.12,
                        "end": 1.36,
                        "conf": 0.82
                    },
                    {
                        "word": "the",
                        "start": 1.36,
                        "end": 1.5,
                        "conf": 0.91
                    },
                    {
                        "word": "test",
                        "start": 1.5,
                        "end": 1.8,
                        "conf": 0.99
                    },
                    {
                        "word": "audio",
                        "start": 1.8,
                        "end": 2.1,
                        "conf": 0.7
                    },
                    {
                        "word": "file.",
                        "start": 2.1,
                        "end": 2.76,
                        "conf": 0.99
                    }
                ],
                "segments": [
                    {
                        "startTime": 0.8184375,
                        "endTime": 2.6746875,
                        "text": "This is the test audio file.",
                        "speaker": "Speaker 0",
                        "sentiment": "neutral"
                    }
                ],
                "summary": "Summary: The audio file is a test recording.\n\nActionable Items: None\n\nInsights: None"
            },
            "translation": {
                "hi": {
                    "text": "यह परीक्षण ऑडियो फ़ाइल है.",
                    "segments": [
                        {
                            "startTime": 0.8184375,
                            "endTime": 2.6746875,
                            "text": "यह परीक्षण ऑडियो फ़ाइल है.",
                            "speaker": "Speaker 0"
                        }
                    ],
                    "summary": "सारांश: ऑडियो फ़ाइल एक परीक्षण रिकॉर्डिंग है।\n\nकार्रवाई योग्य वस्तुएँ: कोई नहीं\n\nअंतर्दृष्टि: कोई नहीं"
                },
                "ar": {
                    "text": "هذا هو ملف الصوت التجريبي.",
                    "segments": [
                        {
                            "startTime": 0.8184375,
                            "endTime": 2.6746875,
                            "text": "هذا هو ملف الصوت التجريبي.",
                            "speaker": "Speaker 0"
                        }
                    ],
                    "summary": "ملخص: الملف الصوتي هو تسجيل اختباري.\n\nالعناصر القابلة للتنفيذ: لا يوجد\n\nرؤى: لا شيء"
                }
            },
            "sentiment_detection": {
                "overall": "neutral"
            }
        }
    }
}

Script

import requests
import neuralspace as ns

vai = ns.VoiceAI()
# or,
# vai = ns.VoiceAI(api_key='YOUR_API_KEY')

# Assign your job's ID to a variable
job_id = "6abe4f35-8220-4981-95c7-3b040d9b86d1"

# Fetch a completed job's result
result = vai.get_job_status(job_id)
print(result)

Output

{
    "success": true,
    "message": "Data fetched successfully",
    "data": {
        "timestamp": 1695891454965,
        "filename": "test_audio_en.m4a",
        "jobId": "6abe4f35-8220-4981-95c7-3b040d9b86d1",
        "params": {
            "file_transcription": {
                "language_id": "en",
                "mode": "advanced",
                "number_formatting": "words"
            },
            "translation": {
                "source_language": "en",
                "target_languages": [
                    "hi",
                    "ar"
                ]
            },
            "summarize": true,
            "sentiment_detect": true
        },
        "status": "Completed",
        "audioDuration": 3.232,
        "messsage": "",
        "progress": [
            "Queued",
            "Started",
            "Transcription Started",
            "Speaker Diarization Started",
            "Speaker Diarization Completed",
            "Transcription Completed",
            "Subtitles Started",
            "Subtitles Completed",
            "Sentiment Detection Started",
            "Summarization Started",
            "Translation Started",
            "Translation Completed",
            "Sentiment Detection Completed",
            "Summarization Completed",
            "Completed"
        ],
        "result": {
            "transcription": {
                "transcript": "This is the test audio file.",
                "timestamps": [
                    {
                        "word": "This",
                        "start": 0.8,
                        "end": 1.12,
                        "conf": 0.82
                    },
                    {
                        "word": "is",
                        "start": 1.12,
                        "end": 1.36,
                        "conf": 0.82
                    },
                    {
                        "word": "the",
                        "start": 1.36,
                        "end": 1.5,
                        "conf": 0.91
                    },
                    {
                        "word": "test",
                        "start": 1.5,
                        "end": 1.8,
                        "conf": 0.99
                    },
                    {
                        "word": "audio",
                        "start": 1.8,
                        "end": 2.1,
                        "conf": 0.7
                    },
                    {
                        "word": "file.",
                        "start": 2.1,
                        "end": 2.76,
                        "conf": 0.99
                    }
                ],
                "segments": [
                    {
                        "startTime": 0.8184375,
                        "endTime": 2.6746875,
                        "text": "This is the test audio file.",
                        "speaker": "Speaker 0",
                        "sentiment": "neutral"
                    }
                ],
                "summary": "Summary: The audio file is a test recording.\n\nActionable Items: None\n\nInsights: None"
            },
            "translation": {
                "hi": {
                    "text": "यह परीक्षण ऑडियो फ़ाइल है.",
                    "segments": [
                        {
                            "startTime": 0.8184375,
                            "endTime": 2.6746875,
                            "text": "यह परीक्षण ऑडियो फ़ाइल है.",
                            "speaker": "Speaker 0"
                        }
                    ],
                    "summary": "सारांश: ऑडियो फ़ाइल एक परीक्षण रिकॉर्डिंग है।\n\nकार्रवाई योग्य वस्तुएँ: कोई नहीं\n\nअंतर्दृष्टि: कोई नहीं"
                },
                "ar": {
                    "text": "هذا هو ملف الصوت التجريبي.",
                    "segments": [
                        {
                            "startTime": 0.8184375,
                            "endTime": 2.6746875,
                            "text": "هذا هو ملف الصوت التجريبي.",
                            "speaker": "Speaker 0"
                        }
                    ],
                    "summary": "ملخص: الملف الصوتي هو تسجيل اختباري.\n\nالعناصر القابلة للتنفيذ: لا يوجد\n\nرؤى: لا شيء"
                }
            },
            "sentiment_detection": {
                "overall": "neutral"
            }
        }
    }
}

File Transcription Job with all Features Enabled​

File Transcription Full Result​

File Transcription Job with all Features Enabled

File Transcription Full Result