sya

split youtube audio tracks, with an optional pyqt gui
git clone git://src.gearsix.net/sya
Log | Files | Refs | Atom | README

sya.py (8334B)


      1 #!/usr/bin/env python3
      2 
      3 # std
      4 import argparse
      5 import subprocess
      6 import re
      7 import os
      8 import sys
      9 
     10 Version = 'v1.3.0'
     11 
     12 Shell = True if sys.platform == 'win32' else False
     13 
     14 UnsafeFilenameChars = re.compile('[/\\?%*:|\"<>\x7F\x00-\x1F]')
     15 TrackNum = re.compile(r'(?:\d+.? ?-? ?)')
     16 Timestamp = re.compile(r'(?: - )?(?:[\t ]+)?(?:[\[\(]+)?((\d+[:.])+(\d+))(?:[\]\)])?(?:[\t ]+)?(?: - )?')
     17 
     18 class TracklistItem:
     19     def __init__(self, timestamp, title):
     20         self.timestamp = timestamp
     21         self.title = title
     22 
     23 
     24 # utilities
     25 def error_exit(msg):
     26     print('exit failure "{}"'.format(msg))
     27     sys.exit()
     28 
     29 def check_bin(*binaries):
     30     for b in binaries:
     31         try:
     32             subprocess.call([b], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL, shell=Shell)
     33         except:
     34             error_exit('failed to execute {}'.format(b))
     35 
     36 # functions
     37 def get_audio(youtubedl, url, outdir, format='mp3', quality='320K', keep=True, ffmpeg=''):
     38     print('Downloading {} ({}, {})...'.format(url, format, quality))
     39     fname = '{}/{}'.format(outdir, os.path.basename(outdir), format)
     40     cmd = [youtubedl, '--newline', '--extract-audio', '--audio-format', format,
     41             '--audio-quality', quality, '-o', fname + '.%(ext)s']
     42     if ffmpeg != '':
     43         cmd.append('--ffmpeg-location')
     44         cmd.append(ffmpeg)
     45     if keep == True:
     46         cmd.append('-k')
     47     cmd.append(url)
     48     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=Shell)
     49     for line in p.stdout.readlines():
     50         print('    {}'.format(line.decode('utf-8', errors='ignore').strip()))
     51     return '{}.{}'.format(fname, format)
     52 
     53 def load_tracklist(path):
     54     tracklist = []
     55     url = ''
     56     tracklist_file = open(path, mode = 'r', encoding='utf-8', errors='ignore')
     57     for i, t in enumerate(tracklist_file.readlines()):
     58         t = t.strip('\n\t ')
     59         if i == 0:
     60             url = t
     61         else:
     62             tracklist.append(t)
     63     tracklist_file.close()
     64     return url, tracklist
     65 
     66 def parse_tracks(tracklist):
     67     '''
     68     This function parses each line of tracklist and
     69     matches it against the `Timestamp` regex.
     70     There is a heuristic check against timestamps on
     71     the right and left of lines (some track names may
     72     have timestamp-looking elements).
     73     '''
     74     tracks = []
     75     weightR = 0 # num. timestamps on right-side
     76     weightL = 0 # num. timestamps on left-side
     77     for lcount, line in enumerate(tracklist):
     78         if len(line.strip(' ')) == 0:
     79             continue
     80         sline = line.split(' ')
     81         
     82         timestamp = None
     83         for i, l in enumerate(sline):
     84             if i != 0 and i != len(sline)-1:
     85                 continue
     86             elif Timestamp.match(l):
     87                 if timestamp == None or weightR > weightL:
     88                     timestamp = l.strip(' \t[()]')
     89                 if i == 0:
     90                     weightL += 1
     91                 else:
     92                     weightR += 1
     93                 sline.remove(l)
     94         if timestamp == None:
     95             print('line {}, missing timestamp: "{}"'.format(lcount, line))
     96         elif timestamp[0] != '0' and timestamp[1] == ':':
     97             timestamp = '0' + timestamp
     98         
     99         line = ' '.join(sline)
    100         line = re.sub(TrackNum, '', line)
    101         title = re.sub(UnsafeFilenameChars, '', line)
    102         
    103         tracks.append(TracklistItem(timestamp, title))
    104     return tracks
    105 
    106 def missing_times(tracks):
    107     missing = []
    108     for i, t in enumerate(tracks):
    109         if t.timestamp == None:
    110             missing.append(i)
    111     return missing
    112 
    113 def read_tracklen(ffmpeg, track_fpath):
    114     cmd = [ffmpeg, '-v', 'quiet', '-stats', '-i', track_fpath, '-f', 'null', '-']
    115     length = '00:00'
    116     try:
    117         ret = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=Shell)
    118         length = str(ret).split('\\r')
    119         # some nasty string manip. to extract length (printed to stderr)
    120         if sys.platform == 'win32':
    121             length = length[len(length)-2].split(' ')[1].split('=')[1][:-3]
    122         else:
    123             length = length[len(length)-1].split(' ')[1].split('=')[1][:-3]
    124         print('Track length: {}'.format(length))
    125     except:
    126         error_exit('Failed to find track length, aborting.')
    127     return length
    128 
    129 def split_tracks(ffmpeg, audio_fpath, audio_len, tracks, format='mp3', outpath='out'):    
    130     print('Splitting...')
    131     for i, t in enumerate(tracks):
    132         outfile = '{}{}{} - {}.{}'.format(outpath, os.path.sep, str(i+1).zfill(2), t.title.strip(' - '), format)
    133         end = audio_len
    134         if i < len(tracks)-1:
    135             end = tracks[i+1].timestamp
    136         print('     {} ({} - {})'.format(outfile, t.timestamp, end))
    137         cmd = ['ffmpeg', '-nostdin', '-y', '-loglevel', 'error', 
    138             '-i', audio_fpath, '-ss', t.timestamp, '-to', end,
    139             '-acodec', 'copy', outfile]
    140         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=Shell)
    141         for line in p.stdout.readlines():
    142             print('    {}'.format(line.decode('utf-8', errors='ignore').strip()))
    143     return
    144 
    145 # runtime
    146 def parse_args():
    147     parser = argparse.ArgumentParser(
    148         description='download & split audio tracks long youtube videos')
    149     # arguments
    150     parser.add_argument('tracklist', metavar='TRACKLIST', nargs='*',
    151         help='tracklist of title and timestamp information to split audio by')
    152     # options
    153     parser.add_argument('-v', '--version',
    154         action='store_true', default=False, dest='vers',
    155         help='Print version information')
    156     parser.add_argument('-o', '--output',
    157         metavar='PATH', type=str, nargs='?', dest='output',
    158         help='specify the directory to write output files to (default: ./out)')
    159     parser.add_argument('-f', '--format',
    160         type=str, nargs='?', default='mp3', dest='format',
    161         help='specify the --audio-format argument to pass to yt-dlp (default: mp3)')
    162     parser.add_argument('-q', '--quality',
    163         type=str, nargs='?', default='320K', dest='quality',
    164         help='specify the --audio-quality argument to pass to yt-dlp (default: 320K)')
    165     parser.add_argument('--yt-dlp',
    166         metavar='PATH', type=str, nargs='?', dest='youtubedl',
    167         help='path of the "yt-dlp" binary to use')
    168     parser.add_argument('--ffmpeg',
    169         metavar='PATH', type=str, nargs='?', dest='ffmpeg',
    170         help='path of the "ffmpeg" binary to use')
    171     parser.add_argument('-k', '--keep',
    172         action='store_true', default=False, dest='keep',
    173         help='keep any files removed during processing (full video/audio file)')
    174     return parser.parse_args()
    175 
    176 def sya(args):
    177     if args.vers == True:
    178         print(Version)
    179         return
    180         
    181     if args.youtubedl == None:
    182         args.youtubedl = 'yt-dlp.exe' if sys.platform == 'win32' else 'yt-dlp'
    183     if args.ffmpeg == None:
    184         args.ffmpeg = 'ffmpeg.exe' if sys.platform == 'win32' else 'ffmpeg'
    185 
    186     if check_bin(args.youtubedl, args.ffmpeg) == False:
    187         error_exit('required binaries are missing')
    188     if args.output != None and len(args.output) > 0 and args.output[len(args.output)-1] == os.sep:
    189         args.output = args.output[:-1]
    190 
    191     for t in args.tracklist:
    192         if args.tracklist == None or os.path.exists(t) == False:
    193             error_exit('missing tracklist "{}"'.format(t))
    194         url, tracklist = load_tracklist(t)
    195 
    196         output = args.output if args.output != None else os.path.splitext(t)[0]
    197         if args.ffmpeg == 'ffmpeg' or args.ffmpeg == 'ffmpeg.exe':
    198             audio_fpath = get_audio(args.youtubedl, url, output, args.format, args.quality, args.keep, '')
    199         else:
    200             audio_fpath = get_audio(args.youtubedl, url, output, args.format, args.quality, args.keep, args.ffmpeg)
    201         if os.path.exists(audio_fpath) == False:
    202             error_exit('download failed, aborting')
    203 
    204     
    205         tracks = parse_tracks(tracklist)
    206         
    207         missing = missing_times(tracks)
    208         if len(missing) > 0:
    209             error_exit('some tracks are missing timestamps')
    210 
    211         length = read_tracklen(args.ffmpeg, audio_fpath)
    212         os.makedirs(output, exist_ok=True)
    213         split_tracks(args.ffmpeg, audio_fpath, length, tracks, args.format, output)
    214 
    215         if args.keep is False:
    216             os.remove(audio_fpath)
    217 
    218         print('Success')
    219 
    220 if __name__ == '__main__':
    221     sya(parse_args())
    222