Linux ass2srt

Linux ass2srt

bash script

#! /usr/bin/env bash

if [ $# -ne 3 ]
then
    echo "USAGE: $0 <directory> <fromEncoding> <toEnoding>"
    exit 1;
fi

if [ -d $1 ]
then
    echo "Notice: Directory is set $1"
    DIRNAME=$1
else
    echo "Error: $1 is not a valid directory"
    exit 1;
fi

if [ $2 = ‘utf16-le‘ -o $2 = ‘big5‘ -o $2 = ‘utf8‘ -o $2 = ‘ucs-2le‘ ]
then
    echo "Notice: ASS2SRT from encoding is set $2"
    ASS2SRT_FROM=$2
else
    echo "Error: ASS2SRT from encoding setting error"
    exit 1;
fi

if [ $3 = ‘utf16-le‘ -o $3 = ‘big5‘ -o $3 = ‘utf8‘ -o $3 = ‘ucs-2le‘ ]
then
    echo "Notice: ASS2SRT to encoding is set $3"
    ASS2SRT_TO=$3
else
    echo "Error: ASS2SRT to encoding setting error"
    exit 1;
fi

if [ ! -f ./ass2srt.pl ]
then
    echo "Error: ass2srt.pl is not in the current directory"
    exit 1;
fi

for file in `ls $1`
do
    if [ -f $file ]
    then
        if [ ${file##*.} = ‘ass‘ ]
        then
            echo "perl ./ass2srt.pl -f $ASS2SRT_FROM -t $ASS2SRT_TO ${DIRNAME}/${file%.*}.ass ${DIRNAME}/${file%.*}.srt"
            perl ./ass2srt.pl -f $ASS2SRT_FROM -t $ASS2SRT_TO ${DIRNAME}/${file%.*}.ass ${DIRNAME}/${file%.*}.srt
        fi
    fi
done 

ass2srt.pl

#!/usr/bin/perl -w
use strict;
use warnings;
#use encoding ‘utf-8‘; # compilation aborted at
use utf8;                  # Source code is UTF-8
use open ‘:std‘, ‘:utf8‘;  # STDIN,STOUT,STERR is UTF-8.
use Getopt::Long;
use File::Spec;

Getopt::Long::Configure( "pass_through", "no_ignore_case" );

# Default encoding
my $defaultEnc  = ‘utf8‘;
my $fromEnc     = ‘‘;
my $toEnc       = ‘‘;
my $help        = 0;
my $preserve    = 0;
my $showVer     = 0;
my $showLicense = 0;
my $dosFormat   = 0;
my $macFormat   = 0;

my $oldTime = "";

$fromEnc = $ENV{‘ASS2SRT_FROM‘};
$toEnc   = $ENV{‘ASS2SRT_TO‘};

my $DEBUG = 0;
my $noCorrect = 0;

GetOptions(
    "help|h",     \$help,      "from|f=s",    \$fromEnc,
    "to|t=s",     \$toEnc,     "default|d=s", \$defaultEnc,
    "preserve|p", \$preserve,
    "version|v",  \$showVer,   "dos|o",       \$dosFormat,
    "mac|m",      \$macFormat, "license|l",   \$showLicense,
    "debug",      \$DEBUG,     "nocorrect",   \$noCorrect
);

# Lines read from .ass file will be put here (each in a hash with the
# relevant info) to be written to the .srt file after some post-processing.
my @lines;

# Wrap format
my $crlf = "\n";

# If the specified output at the same time as Mac and DOS format,
# responding to an error message and terminated
( $dosFormat && $macFormat )
    && die( "\n Error: \n\tOption -m|--mac and -o|--dos can not exist together \n" );

if ($dosFormat) { $crlf = "\r" . $crlf; }
if ($macFormat) { $crlf = "\r"; }

# Program version
my $version = "0.3.0.1";

# Display help message
if ($help) { usage(); }

# Show license information
if ($showLicense) { license(); }

# Show version Information
if ($showVer) { showVersion(); }

# If the source file encoding is not specified, then set it equals to defaultEnc
if ( !$fromEnc ) { $fromEnc = $defaultEnc; }

# If the purpose file encoding is not specified, then set it equals to defaultEnc
if ( !$toEnc ) { $toEnc = $defaultEnc; }

# The .ass file file name
my $assFile = shift || ‘‘;
if ( !$assFile ) { usage(); }

# The .srt file file name, the default value is .ass‘s filename,
# but changed the name extension to .srt
my $srtFile = shift || ‘‘;
if ( !$srtFile ) {
    $srtFile = $assFile;
    $srtFile =~ s|.[^.]*$||;
    $srtFile .= ".srt";
}

# Using the parameters listed
print "Using parameters: \n";
print "\t .ass File Encoding: $fromEnc\n";
print "\t .SRT File Encoding: $toEnc\n";
print "\n";

# Open file
open( ASSFILE, "<", $assFile )
    or die ‘Can not open ".ass" source file:‘ . $assFile . ", please check it!!\n";

# Specify the file encoding
binmode( ASSFILE, ‘:encoding(‘ . $fromEnc . ‘)‘ );

# Check the existence of the system environment variable OS (Windows OS should be set this)
my $OS_TYPE = $ENV{‘OS‘};

# Assigned the .srt file encodings
my $toEncString = ‘:encoding(‘ . $toEnc . ‘)‘;

# Depending on OS type (only for Windows), fix the .srt file encoding setting.
if ( $OS_TYPE && $OS_TYPE =~ m/windows/i ) {
    print "Operation System is: " . $OS_TYPE
        . ", Checking File Encoding... \n";

    # Only use UTF16 or UCS format when otherwise specified (others need another test)
    if ( ( $toEnc =~ m/utf16/i ) || ( $toEnc =~ m/ucs/i ) ) {
        $toEncString = ‘:raw‘ . $toEncString;
    }
}

# Unicode output file on Windows need to join the BOM identification mark.
if ( $toEncString =~ m/:raw/i ) {
    print SRTFILE "\x{FEFF}";
    print "Using unicode encoding, print BOM signature to file: " . $srtFile
        . "\n\n";
}

# Number of records subtitles
my $lineNum = 0;

# Read .ass and deal with the source file
while (<ASSFILE>) {
    chomp;

    # If a line begin with ‘Dialougue‘, extract this line‘s parameters
    if (m/^Dialogue:/) {
        $lineNum = extractLine( $lineNum, $_ );
     }
    elsif (m/^(Title:|Original)/) {
     # If a line begins with ‘Title‘ or ‘Original‘, it is the source for the subtitles
        print $_ . "\n";
     }

}

# Close the file
close ASSFILE;

# post-process lines to remove errors.
if (!$noCorrect)
{
    for (my $i = 0; $i < scalar @lines; $i++)
    {
        my $line = $lines[$i];
        my $prevLine = undef;
        $prevLine = $lines[$i-1] if $i > 0;

        # if the begin of the previous line is the same as the begin of this line,
        # merge the two lines.
        if ($prevLine && $prevLine->{begin} eq $line->{begin})
        {
            # merge subtitles
            $prevLine->{subtitle} .= "\n" . $line->{subtitle};

            # remove line $i from the list
            @lines = (@lines[0..$i-1], @lines[$i+1..$#lines]);

            # resync things for next test
            $i--;
            $prevLine = undef;
            $prevLine = $lines[$i-1] if $i > 0;
        }

        # if the end of the previous line is smaller than the begin of this line,
        # change the end of the previous line to be the begin of this line.
        if ($prevLine && $prevLine->{end} gt $line->{begin})
        {
            $prevLine->{end} = $line->{begin};
        }
    }
}

open( SRTFILE, ">", $srtFile )
    or die ‘Can not open the ".srt" purpose of file:‘ . $srtFile . ", please check!\n";

# Specified file used to write code
binmode( SRTFILE, $toEncString );

# Write SRT file.
my $lineNumber = 1;
foreach my $line (@lines)
{
    # Write .srt file
    # Because .ass unit of time (10ms) different with .srt (1ms), so the full complement of 0
    my $currentTime = $line->{begin} . "0 --> " . $line->{end} . "0";

    print SRTFILE $lineNumber . $crlf;
    print SRTFILE $currentTime . $crlf;
    print SRTFILE $line->{subtitle} . " " . $crlf . $crlf . $crlf;

    $lineNumber++;
}

close SRTFILE;

# Extract data for one line in the .ass file
sub extractLine {

    # Deal with the number of rows
    # From. ass of the original content
    my ($lineNumber, $content) = @_;

    my $begin;
    my $end;
    my $subtitle;
    my $currentTime;

    # Solved starting time, ending time, subtitle format, subtitles content
    if ( $content
        =~ m/Dialogue: [^,]*,([^,]*),([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,(.*)$/
        )
    {
        $begin    = $1;
        $end      = $2;
        $subtitle = $4;

        my $isComment = $3;

        print "\nLine: $lineNumber\n  Begin: [$begin]  End: [$end]  isComment: [$isComment]\n  Subtitle: [$subtitle]\n"
            if $DEBUG;

        # the separator between seconds and ms is "," -- not ".", so we change it !
        $begin =~ s/\./,/g;
        $end   =~ s/\./,/g;

        # If the time format will not be part of the hour when the two chars, make up two chars.
        if ( $begin =~ m/^\d{1}:/ ) {
            $begin = "0" . $begin;
          }

        if ( $end =~ m/^\d{1}:/ ) {
            $end = "0" . $end;
          }

        # First filter out the end of every title to the digital sign in order to follow-up to the output under a variety of formats on different platforms
        $subtitle =~ s/\r$//g;

        # If there is no such setting .ass control commands, then filter out the
        if ( !$preserve ) {
            $subtitle =~ s/{[^}]*}//g;
        }

        # Comment if the subtitle format, then in the before and after the add ()
        if ( $isComment eq ‘comment‘ ) {
            $subtitle = ‘(‘ . $subtitle . ‘)‘;
        }

        print "\nAfter:\n  Begin: [$begin]  End: [$end]  isComment: [$isComment]\n  Subtitle: [$subtitle]\n"
            if $DEBUG;

        my %line = ( begin => $begin, end => $end, isComment => $isComment, subtitle => $subtitle );
        push @lines, \%line;

        return $lineNumber;
    }
}

# Use
sub usage {
    print <<__HELP__;

ass2srt [option] .ass source files [.srt purpose of file]

    The Advanced SubStation Alpha ".ass" file format to SubRip ".srt" format.

Options:
  -h --help               help show this help message

  -d, --default=encoding  default = encoding set the default file encoding, which is the purpose of the source file and use the same code file.
                          The default value is UTF-8 encoding format.

  -f, --from=encoding    from = encoding specified source file. ass coding system used. The set will be covered by the aforementioned pre -
                           Coding based on the settings file, when not specified the default file encoding for the default value.
                         * Tip: You can also use the system environment variables specified ASS2SRT_FROM

  -t, --to=encoding       encoding file specified purposes. srt coding system used. The set will be covered by the aforementioned pre -
                           Coding based on the settings file, when not specified the default file encoding for the default value.
                         * Tip: You can also use the system environment variables specified ASS2SRT_TO

  -p, --preserve          preserve the source file to retain. ass subtitles in the control instructions, write together. srt file.

  -o, --dos               dos specified output file format for DOS (the default format for Unix)
                        * Warning: can not be set this option with -m/--mac together.

  -m, --mac               mac specify the output file format for the Mac (the default format for Unix)
                        * Warning: can not be this option with set -o/--dos together.

  --debug                 print debugging output for each line of the .ass file read.

  --nocorrect             don‘t attempt to correct errors in subtitles
                        if not specified, corrections will be made for the following errors:
                          - subtitle i+1 begins before subtitle i ends - subtitle i‘s end will be set to subtitle i+1‘s begin
                          - subtitle i+1 begins at the same time as subtitle i begins - subtitles will be merged into one subtitle

.ass source file:
    The Subtitles of the original file.
    It can be Advanced SubStation Alpha (.ass) or SubStation Alpha (.ssa) format.

[.srt purpose file]:
    The default SubRip format output filename, the default value is the same as
    the .ass source filename but it‘s extension changed to .srt.

Operation Example:

  Like iconv utility, use -f to specified from encoding set, the -t to specified target encoding set.

   1. Convert a utf16-le.ass file encoded with Unicode/UTF-18 LE to big5.srt with Big5 encoding.

       myhost  \$ perl ass2srt.pl -f utf16-le -t big5 utf16-le.ass big5.srt

   2. Convert a Big5 encoding .ass file to UTF-8 encoding .srt file

       myhost  \$ perl ass2srt.pl -f big5 -t utf8 big5.ass utf8.srt

   3. Convert a UTF-8 encoding .ass file to UTF-8 encoding .srt file,
      no -f or -t parameter because UTF-8 is the default encoding setting.

       myhost  \$ perl ass2srt.pl utf-8.ass utf-8.srt

   4. Convert a UCS-2 LE encoding .ass file to UCS-2 LE encoding .srt file,
      UCS-2 LE is the same as Unicode encoding on Windows Platform.

       myhost  \$ perl ass2srt.pl -f ucs-2le -t ucs-2le ucs-2le.ass ucs-2le.srt

   5. Using the environment variables to specified encoding parameter
      * Warning: Some operation system can not add " or ‘ quotation marks to the varialbe‘s value

       myhost  \$ export ASS2SRT_FROM = utf16-le
       myhost  \$ export ASS2SRT_TO = big5
       myhost  \$ perl ass2srt.pl utf16-le.ass big5.srt

__HELP__
    exit 2;
}

# Show version of the message
sub showVersion {

    print <<__VERSION__;

ass2srt $version - Convert subtitles from .ass to .srt format

  Advanced SubStation Alpha subtitle file format conversion tool
  (c) 2006 Ada Hsu, hungwei.hsu (at) gmail (dot) com
  (c) 2009 Jean-Sebastien Guay, jean_seb (at) videotron (dot) ca

__VERSION__
    exit 2;
}

# License
sub license {

    print <<__LICENSE__;

  ass2srt $version - Convert subtitles from .ass to .srt format

  Advanced SubStation Alpha subtitle file format conversion tool
  (c) 2006 Ada Hsu, hungwei.hsu (at) gmail (dot) com
  (c) 2009 Jean-Sebastien Guay, jean_seb (at) videotron (dot) ca

   This utility use of the software CC-GNU GPL (http://creativecommons.org/licenses/GPL/2.0/)
   authorization, You can view the relevant provisions of
   http://www.gnu.org/copyleft/gpl.html text.

   You are free to use the software, but software maintainer will hold no liability for any damages,
   Thank You.

__LICENSE__
    exit 2;

}

=========== End

原文地址:https://www.cnblogs.com/lsgxeva/p/11609367.html

时间: 2024-11-12 14:15:59

Linux ass2srt的相关文章

排查Linux机器是否已经被入侵

随着开源产品的越来越盛行,作为一个Linux运维工程师,能够清晰地鉴别异常机器是否已经被入侵了显得至关重要,个人结合自己的工作经历,整理了几种常见的机器被黑情况供参考 背景信息:以下情况是在CentOS 6.9的系统中查看的,其它Linux发行版类似 1.入侵者可能会删除机器的日志信息,可以查看日志信息是否还存在或者是否被清空,相关命令示例: [[email protected] ~]# ll -h /var/log/* -rw-------. 1 root root 2.6K Jul 7 18

linux下Nginx配置文件(nginx.conf)配置设置详解(windows用phpstudy集成)

linux备份nginx.conf文件举例: cp /usr/local/nginx/nginx.conf /usr/local/nginx/nginx.conf-20171111(日期) 在进程列表里 面找master进程,它的编号就是主进程号. ps -ef | grep nginx 查看进程 cat /usr/local/nginx/nginx.pid 每次修改完nginx文件都要重新加载配置文件linux命令: /usr/local/nginx -t //验证配置文件是否合法 若ngin

Linux下WebSphereV8.5.5.0 安装详细过程

Linux下WebSphereV8.5.5.0 安装详细过程 自WAS8以后安装包不再区别OS,一份介质可以安装到多个平台.只针对Installation Manager 进行了操作系统的区分 ,Websphere产品介质必须通过专门的工具Install Managere安装.进入IBM的官网http://www.ibm.com/us/en/进行下载.在云盘http://yun.baidu.com/share/linkshareid=2515770728&uk=4252782771 中是Linu

Linux centos下让alias自定义永久生效(+常用的别名)

alias可以简化一些复杂的命令串,使一个单词或简化后的命令即可实现复杂(通常是带很多参数的长串)命令. 基本用法: alias 简化命令='实际的长串命令'    //实际长串命令通常为'原命令 -/选项参数' eg. alias ll='ls -a' 获取别名: alias        //即可查看当前设定的所有alias别名 取消别名: unalias 简化命令 eg. unalias ll            //取消ll的别名 永久生效: 直接使用alias命令定义的别名,重启后就

Linux下修改环境变量PATH

1.什么是环境变量(PATH) 在Linux中,在执行命令时,系统会按照PATH的设置,去每个PATH定义的路径下搜索执行文件,先搜索到的文件先执行. 我们知道查阅文件属性的指令ls 完整文件名为:/bin/ls(这是绝对路径), 那你会不会觉得很奇怪:"为什么我可以在任何地方执行/bin/ls这个指令呢? " 为什么我在任何目录下输入 ls 就一定可以显示出一些讯息而不会说找不到该 /bin/ls 指令呢? 这是因为环境变量 PATH 的帮助所致呀! 当我们在执行一个指令癿时候,举例

老男孩Linux运维第41期20170917开班第四周学习重点课堂记录

第1章 必知必会文件 配置文件位置 该文件作用 /etc/sysconfig/network-scripts/ifcfg-eth0 第一块网卡的配置文件 同setup中的network /etc/resolv.conf 客户端DNS配置文件,优先级低于网卡配置文件 /etc/hosts 主要作用是定义IP地址和主机名的映射关系(域名解析),是一个映射IP地址和主机名的规定 /etc/sysconfig/network 用于配置hostname和networking /etc/fstab 开机自动

Linux基础命令小结

注意:Linux严格区分大小写 老男孩方法论经验之谈: 有一种方法叫做没方法 有两种方法,左右为难 有三种方法才叫有方法 停止一个命令:CTR + C 1.创建目录 英文:make directorys 命令:mkdir 实例:三种方式 mkdir /data cd / mkdir data cd /;mkdir data mkdir x y z    表示同时创建多个目录 mkdir -p /data/x/y   表示同时创建多级目录(递归创建),切记不可用mkdir /data/x/y 2.

Linux red hat 安装ansible

今日对Linux 系统是Red Hat Enterprise Linux Server release 6.5 (Santiago)对ansible进行安装. 由于系统的源为yum源,所以使用yum install ansible 进行安装,但是报错.如图.(这个错误是yum源没有注册到red hat 系统). yum源不能安装,所以换了一个思路.使用pip安装.pip是依赖python安装的. 1.检查Python版本 Python -v 检查出来为Python 2.6.6 2.检查pip 版

初识运维3--在虚拟机中安装Linux发行版系统(CentOS)的方法

在讲Linux系统发行版本的安装过程之前,先大略说明一下虚拟化. 虚拟化:将底层硬件资源抽象为用户更容易读懂和使用的逻辑抽象层的技术. 最早由IBM提出,现使用率较高的虚拟化软件平台有三类:VMware workstation.VirtualBOX.HyperV.在这里使用VMware workstation作为例子讲解说明安装过程. 虚拟化网络: 桥接模式:让物理机和虚拟机利用物理网络接口完成通信.虚拟机可以访问互联网. 仅主机模式:让虚拟机和物理机利用被虚拟出来的VMnet1网络接口完成通信