Source code for stash_vroom.jav

# Copyright 2025 Zyquo Onrel
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This module provides utility functions for working with JAV files and filenames.
"""

import os
import re
import logging

from . import util
from . import slr

log = logging.getLogger(__name__)

[docs] def get_is_jav(filepath): """ Determine if the given file is a JAV (Japanese Adult Video) file. :param filepath: A filename or file path :type filepath: str :return: `True` if the file is a JAV, `False` otherwise. """ return not not get_jav_info(filepath)
[docs] def get_jav_info(filepath): filename = os.path.basename(filepath) mtch = match_jav_filename(filename) if not mtch: #log.debug(f' - Not JAV; no regex match') return None years = [ str(X) for X in range(2010, 2030) ] if mtch[3] in years: # log.info(f' - Not JAV for year match {repr(filename)}: "{mtch[1]}" "{mtch[2]}" {mtch[3]}') return None part = mtch[5] if mtch[5] else '' part = re.sub(r'^part', '', part, flags=re.IGNORECASE) part = part.upper() # log.debug(f' - JAV match {repr(filename)}: "{mtch[1]}" "{mtch[2]}" {mtch[3]} {repr(mtch[4])} {repr(part)}') return {'studio':mtch[1].upper(), 'id':mtch[3], 'mid':mtch[2], 'filename':filename, 'part':part}
[docs] def match_jav_filename(filename): if not filename or not isinstance(filename, str): raise ValueError(f'Invalid filename: {filename}') ok_extension_re = util.get_vid_re() slr_info = slr.get_slr_info(filename) if slr_info: #log.debug(f' - Not JAV; matched slr: {slr_info}') return None if filename.endswith('-181_180x180_3dh_LR.mp4'): #log.debug(f' - Not JAV; matched suffix') return None prefixes = ['SLR-', 'SLR_', 'JillVR_', 'realhotvr-', 'wankzvr-', 'reality-lovers-', 'sexbabesvr-', 'only2xvr-'] for prefix in prefixes: if filename.startswith(prefix): #log.debug(f' - Not JAV; matched prefix: {prefix}') return None fname = filename # Fix all the WVR variants first. #fname = re.sub(r'^wvr0(\d)' , r'WVR1-\1', fname, flags=re.IGNORECASE) fname = re.sub(r'^2DSVR\b' , r'DSVR' , fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR-11-(\d\d\d)' , r'WVR1-\1', fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR-2-(\d\d\d)' , r'WVR1-\1', fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR-101(\d\d\d)' , r'WVR1\1' , fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR-11(\d\d\d)' , r'WVR1\1' , fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR-2(\d)' , r'WVR1\1' , fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR5D-' , r'WVR6-' , fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR5-D(\d)' , r'WVR6-\1', fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR5D(\d\d\d)' , r'WVR6\1' , fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR7(\d\d\w)\b' , r'WVR80\1',fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR-9(\d\d\d)' , r'WVR8\1', fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR-9(\d\d\d)' , r'WVR8\1', fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR8d\b' , r'WVR9' , fname, flags=re.IGNORECASE) fname = re.sub(r'^WVR-91(\d\d\d)' , r'WVR9\1', fname, flags=re.IGNORECASE) # Fix all the rest. fname = re.sub(ok_extension_re , '' , fname, flags=re.IGNORECASE) fname = re.sub(r'[-_\b]mkx199' , '' , fname, flags=re.IGNORECASE) fname = re.sub(r'[-_\b]mkx219' , '' , fname, flags=re.IGNORECASE) fname = re.sub(r'[-_]*(299|320|640|720|\d\d\d\d)p', '' , fname, flags=re.IGNORECASE) fname = re.sub(r'179-SBS\b' , '' , fname, flags=re.IGNORECASE) fname = re.sub(r'179_LR\b' , '' , fname, flags=re.IGNORECASE) fname = re.sub(r'_\d+-SLR\b' , '' , fname, flags=re.IGNORECASE) # SLR downloads #fname = re.sub(r'abc122', '', fname) #log.debug(f'-- fname = {repr(fname)}') connector_re = r'[-_0\s\.]*' part_re = f'({connector_re})' + r'(\d{0,2}\b|[a-z]\b|part\d+)' jav_re = ('' + r'\b' #+ r'(2?[a-z]{3,9}(?:6|6D|8|9D)?)' #+ r'(2?[a-z]{3,9})' #+ r'(WVR0|WVR4|WVR8|WVR9|WVR6)' + r'(WVR0|WVR4|WVR8|WVR9|WVR6|[a-z]{3,9})' + f'({connector_re})' + r'(\d{2,6})' + r'(?:' + part_re + r')?' ) mtch = re.search(jav_re, fname, re.IGNORECASE) return mtch