# encoding: utf-8 from unittest2 import TestCase import copy from lxml import etree from babelsubs.parsers.dfxp import DFXPParser from babelsubs.generators.dfxp import DFXPGenerator from babelsubs.generators.srt import SRTGenerator from babelsubs.parsers.base import SubtitleParserError from babelsubs.storage import ( SubtitleSet, get_attr, TTML_NAMESPACE_URI, _cleanup_legacy_namespace, TTML_NAMESPACE_URI_LEGACY ) from babelsubs.tests import utils from babelsubs import load_from SRT_TEXT = u""" 1 00:00:01,004 --> 00:00:04,094 Welkom bij de presentatie é over optellen niveau 2 2 00:00:04,094 --> 00:00:07,054 And the message, with non ascii chars caçao. 3 00:00:09,094 --> 00:00:12,054 We need <i>italics</i> <b>bold</b> <u>underline</u> and speaker change >>Hey . """ class DFXPParsingTest(TestCase): def test_basic(self): subs = utils.get_subs("simple.dfxp") self.assertEquals(len(subs), 76) def test_internal_format(self): subs = utils.get_subs("simple.dfxp") parsed = subs.to_internal() sub_data = [x for x in parsed.subtitle_items()] self.assertEquals(sub_data[0][0], 1200) self.assertEquals(sub_data[0][1], 4467) self.assertEquals(sub_data[3][2], 'at least 7,000 years ago.') def test_self_generate(self): parsed_subs1 = utils.get_subs("simple.dfxp") parsed_subs2 = DFXPParser(DFXPGenerator(parsed_subs1.subtitle_set, 'en').__unicode__()) for x1, x2 in zip([x for x in parsed_subs1.to_internal()], [x for x in parsed_subs2.to_internal()]): self.assertEquals(x1, x2) def test_load_from_string(self): filename = utils.get_data_file_path('simple.dfxp') with open(filename) as f: s = f.read() load_from(s, type='dfxp').to_internal() def test_wrong_format(self): with self.assertRaises(SubtitleParserError): DFXPParser.parse(SRT_TEXT) def test_unsynced_generator(self): subs = SubtitleSet('en') for x in xrange(0,5): subs.append_subtitle(None, None,"%s" % x) output = unicode(DFXPGenerator(subs)) parsed = DFXPParser(output, 'en') internal = parsed.to_internal() subs = [x for x in internal.subtitle_items()] self.assertEqual(len(internal), 5) for i,sub in enumerate(subs): self.assertIsNone(sub[0]) self.assertIsNone(sub[1]) self.assertEqual(sub[2], str(i)) for node in internal.get_subtitles(): self.assertIsNone(get_attr(node, 'begin')) self.assertIsNone(get_attr(node, 'end')) def test_invalid(self): with self.assertRaises(SubtitleParserError): DFXPParser ("this\n\nisnot a valid subs format","en") def test_whitespace(self): subs = utils.get_subs("pre-dmr.dfxp") sub = subs.subtitle_set.subtitle_items(mappings=SRTGenerator.MAPPINGS)[0] self.assertEqual(sub.text, '''Last time, we began talking about\nresonance structures. And I'd like''') def test_equality_ignores_whitespace(self): subs_1 = utils.get_subs('pre-dmr.dfxp').subtitle_set subs_2 = utils.get_subs('pre-dmr-whitespace.dfxp').subtitle_set self.assertEqual(subs_1, subs_2) def test_unsynced(self): sset = utils.get_subs('i-2376.dfxp').subtitle_set self.assertFalse(sset.fully_synced) class LegacyDFXPTest(TestCase): def test_ttfa(self): subs = utils.get_subs("pre-dmr.dfxp") self.assertEquals(len(subs), 419) # make sure the right namespace is in subs.subtitle_set._ttml.tag = '{http://www.w3.org/ns/ttml}tt' self.assertEqual(subs.subtitle_set._ttml.nsmap[None] , TTML_NAMESPACE_URI) subs = utils.get_subs("pre-dmr2.dfxp") self.assertEquals(len(subs), 19) # make sure the right namespace is in subs.subtitle_set._ttml.tag = '{http://www.w3.org/ns/ttml}tt' def test_cleanup_namespace(self): input_string = open(utils.get_data_file_path("pre-dmr.dfxp")).read() cleaned = _cleanup_legacy_namespace(input_string) self.assertEqual(cleaned.find(TTML_NAMESPACE_URI_LEGACY), -1) sset = SubtitleSet(language_code='en', initial_data=cleaned) self.assertEqual(len(sset), 419) class DFXPMergeTest(TestCase): def test_dfxp_merge(self): en_subs = SubtitleSet('en') es_subs = SubtitleSet('es') fr_subs = SubtitleSet('fr') en_subs.append_subtitle(1000, 1500, 'content') es_subs.append_subtitle(1000, 1500, 'spanish content') fr_subs.append_subtitle(1000, 1500, 'french content') correct_xml = """\ <tt xmlns="http://www.w3.org/ns/ttml" xmlns:tts="http://www.w3.org/ns/ttml#styling" xml:lang=""> <head> <metadata xmlns:ttm="http://www.w3.org/ns/ttml#metadata"> <ttm:title/> <ttm:description/> <ttm:copyright/> </metadata> <styling xmlns:tts="http://www.w3.org/ns/ttml#styling"> <style xml:id="amara-style" tts:color="white" tts:fontFamily="proportionalSansSerif" tts:fontSize="18px" tts:textAlign="center"/> </styling> <layout xmlns:tts="http://www.w3.org/ns/ttml#styling"> <region xml:id="amara-subtitle-area" style="amara-style" tts:extent="560px 62px" tts:padding="5px 3px" tts:backgroundColor="black" tts:displayAlign="after"/> </layout> </head> <body region="amara-subtitle-area"> <div xml:lang="en"> <p begin="00:00:01.000" end="00:00:01.500">content</p></div> <div xml:lang="es"> <p begin="00:00:01.000" end="00:00:01.500">spanish content</p></div> <div xml:lang="fr"> <p begin="00:00:01.000" end="00:00:01.500">french content</p></div> </body> </tt> """ self.assertEquals( DFXPGenerator.merge_subtitles([en_subs, es_subs, fr_subs]), etree.tostring(etree.fromstring(correct_xml)) )