# For copyright and license terms, see COPYRIGHT.rst (top level of repository)
# Repository: https://github.com/C3S/collecting_society_web
import os
import shutil
import logging
import magic
import hashlib
import uuid
import re
import datetime
import time
from cgi import FieldStorage
from webob.byterange import ContentRange
from pyramid.security import NO_PERMISSION_REQUIRED
from pyramid.authorization import (
Allow,
DENY_ALL
)
from pyramid.httpexceptions import HTTPInternalServerError
from cornice import Service
from portal_web.services import (
benchmark,
csv_export,
csv_import
)
from portal_web.models import (
Tdb,
WebUser,
Checksum
)
from collecting_society_web.models import Content
from ...services import _
from ...services.lossless_audio_formats import (
lossless_audio_extensions,
lossless_audio_mimetypes
)
from ...services.sheet_music_formats import (
sheet_music_extensions,
sheet_music_mimetypes
)
log = logging.getLogger(__name__)
mime = magic.Magic(mime=True)
_prefix = 'repertoire'
# --- configuration -----------------------------------------------------------
_path_temporary = 'temporary'
_path_uploaded = 'uploaded'
_path_rejected = 'rejected'
_path_previews = 'previews'
_hash_algorithm = hashlib.sha256
_checksum_algorithm = hashlib.sha256
_checksum_postfix = '.checksums'
# --- methods -----------------------------------------------------------------
[docs]
def get_cors_policy():
return {
'origins': os.environ['WEBAPI_CORS'].split(","),
'credentials': True
}
[docs]
def get_acl(request):
# no webuser logged in
if not request.web_user:
return [DENY_ALL]
# webuser logged in
return [
(
Allow,
request.unauthenticated_userid,
('create', 'read', 'update', 'delete')
),
DENY_ALL
]
[docs]
def get_hostname():
return 'processing'
[docs]
def get_url(url, version, action, content_id):
if url.endswith('/'):
url = url[:-1]
return '/'.join([str(url), str(version), str(action), str(content_id)])
[docs]
def get_path(request, directory, filename=None):
webuser_directory = str(request.web_user.id)
# special case: for previews use content base path
if directory == _path_previews:
content_base_directory = (
request.registry.settings['api.repertoire.content_base_path'])
if not filename:
return os.path.join(content_base_directory, directory,
webuser_directory)
return os.path.join(content_base_directory, directory,
webuser_directory, filename)
else:
storage_base_directory = (request.registry.settings[
'api.repertoire.storage_base_path'])
if not filename:
return os.path.join(storage_base_directory, directory,
webuser_directory)
return os.path.join(storage_base_directory, directory,
webuser_directory, filename)
[docs]
def cleanup_temp_directory(request):
storage_base_directory = (request.registry.settings[
'api.repertoire.storage_base_path'])
temp_directory = os.path.join(storage_base_directory, _path_temporary)
# filter for certain file patterns
uuidrule = '^[0-9A-Fa-f]{64}(|.checksums)$'
uuidhex = re.compile(uuidrule, re.I)
# walk through the temporary directory structure
now = time.time()
expire_seconds = now - int(request.registry.settings[
'api.repertoire.tempfile_expire_days']) * 86400
for root, dirs, files in os.walk(temp_directory):
level = root.replace(temp_directory, '').count(os.sep)
if level == 1:
for tmpfile in files:
# only delete files we have created
if uuidhex.match(tmpfile) is not None:
tmpfilepath = os.path.join(root, tmpfile)
if os.path.isfile(tmpfilepath):
if os.stat(tmpfilepath).st_mtime < (expire_seconds):
try:
os.remove(tmpfilepath)
log.info(("removed abandoned temporary file"
" '%s'\n") % (tmpfile))
except IOError:
log.info(("couldn't remove abandoned temporary"
" file '%s'\n") % (tmpfile))
finally:
pass
# TODO: delete user directories, if empty
[docs]
def get_content_info(request, content):
if content.category == 'sheet': # pdf
return {
'name': content.name,
'size': content.size,
'extension': content.extension,
'type': content.mime_type,
'uuid': content.uuid,
'category': content.category,
'processing_state': content.processing_state,
'rejection_reason': content.rejection_reason,
'rejection_reason_details': content.rejection_reason_details,
'deleteUrl': get_url(
url=request.registry.settings['api.repertoire.url'],
version=request.registry.settings['api.repertoire.version'],
action='delete',
content_id=content.id
),
'deleteType': 'GET'
}
if content.category == 'audio': # lossless audio
return {
'name': content.name,
'size': content.size,
'extension': content.extension,
'type': content.mime_type,
'uuid': content.uuid,
'category': content.category,
'duration': content.length and "{:.0f}:{:02.0f}".format(
*divmod(int(content.length), 60)
) or 0,
'channels': 'mono' if content.channels == 1 else 'stereo',
'sample_width': content.sample_width and '{:d} bit'.format(
content.sample_width) or 0,
'frame_rate': content.sample_rate and '{:d} Hz'.format(
content.sample_rate) or 0,
'preview_processed': bool(content.preview_path),
'metadata_artist': content.metadata_artist,
'metadata_title': content.metadata_title,
'metadata_release': content.metadata_release,
'metadata_release_date': content.metadata_release_date,
'metadata_track_number': content.metadata_track_number,
'processing_state': content.processing_state,
'rejection_reason': content.rejection_reason,
'rejection_reason_details': content.rejection_reason_details,
'previewUrl': get_url(
url=request.registry.settings['api.repertoire.url'],
version=request.registry.settings['api.repertoire.version'],
action='preview',
content_id=content.id
),
'deleteUrl': get_url(
url=request.registry.settings['api.repertoire.url'],
version=request.registry.settings['api.repertoire.version'],
action='delete',
content_id=content.id
),
'deleteType': 'GET'
}
[docs]
def create_path(path):
try:
os.makedirs(path)
except IOError:
pass
return os.path.exists(path)
[docs]
def delete_file(absolute_path):
# check file
if not os.path.isfile(absolute_path):
return True
# delete file
try:
os.remove(absolute_path)
except IOError:
pass
# admin feedback
log.info(("file deleted: %s\n") % (absolute_path))
return not os.path.isfile(absolute_path)
[docs]
def move_file(source, target):
# check file
if not os.path.isfile(source):
return False
if os.path.isfile(target):
return False
# move file
try:
os.rename(source, target)
except IOError:
pass
return (os.path.isfile(target) and not os.path.isfile(source))
[docs]
def create_paths(request):
for subpath in [_path_temporary,
_path_uploaded,
_path_previews,
_path_rejected]:
path = get_path(request, subpath)
if not os.path.exists(path):
create_path(path)
[docs]
def move_files_with_prefixes(source, target):
ok = True
for postfix in ['', _checksum_postfix]:
ok = (ok and move_file(source + postfix, target + postfix))
return ok
[docs]
def delete_files_with_identifiers(request, identifiers):
for subpath in [_path_temporary,
_path_uploaded,
_path_previews,
_path_rejected]:
for identifier in identifiers:
for postfix in ['', _checksum_postfix]:
path = get_path(request, subpath, identifier + postfix)
if os.path.isfile(path):
delete_file(path)
[docs]
def panic(request, reason, identifiers):
# admin feedback
# 2DO: mail
log.info(
(
"Panic: %s\n"
"Deleting all files:\n"
"- User %s\n"
"- Identifiers %s\n"
) % (
reason, request.web_user, identifiers
)
)
delete_files_with_identifiers(request, identifiers)
raise HTTPInternalServerError
[docs]
def save_upload_to_fs(descriptor, absolute_path, contentrange=None):
# chunked file
if contentrange:
begin, end, length = contentrange
complete = False
# check file
size = 0
if os.path.isfile(absolute_path):
size = os.path.getsize(absolute_path)
if end == length:
complete = True
if begin != size:
# admin feedback
log.info(
(
'Uploaderror: wrong chunkposition '
'(file %s, begin: %s, size: %s).'
) % (
absolute_path, begin, size
)
)
return (False, None)
# whole file
else:
complete = True
descriptor.seek(0)
end = descriptor.tell()
descriptor.seek(0)
# check file
if os.path.isfile(absolute_path):
# admin feedback
log.info(
(
'Uploaderror: file already exists (%s).'
) % (
absolute_path
)
)
return (False, None)
# save
try:
with open(absolute_path, 'a') as f:
shutil.copyfileobj(descriptor, f)
except IOError:
raise HTTPInternalServerError
return (
os.path.getsize(absolute_path) == end,
complete
)
[docs]
@Tdb.transaction(readonly=False)
def save_upload_to_db(content):
contents = Content.create([content])
if not len(contents) == 1:
return False
return contents[0]
[docs]
def validate_upload(filename, absolute_path):
# check filetype
extension = os.path.splitext(filename)[1]
if extension:
extension = extension[1:]
if (extension not in lossless_audio_extensions() and
extension not in sheet_music_extensions()):
return _('Filetype not allowed.')
# check mimetype
mimetype = mime.from_file(absolute_path)
if (mimetype not in lossless_audio_mimetypes() and
mimetype not in sheet_music_mimetypes()):
return _('Mimetype not allowed.')
[docs]
def get_category_from_mimetype(absolute_path):
mimetype = mime.from_file(absolute_path)
if mimetype in lossless_audio_mimetypes():
return 'audio'
if mimetype in sheet_music_mimetypes():
return 'sheet'
return None
[docs]
def create_checksum(descriptor, algorithm=hashlib.sha256):
checksum = algorithm(descriptor.read())
descriptor.seek(0)
return checksum
[docs]
def save_checksum(path, algorithm, checksum, contentrange=None):
begin, end, _ = contentrange
csv_export(
path=path,
fieldnames=[
'begin',
'end',
'algorithm',
'checksum'
],
row={
'begin': begin,
'end': end,
'algorithm': algorithm,
'checksum': checksum
}
)
[docs]
def save_checksums_to_db(content, path):
checksums = []
timestamp = datetime.datetime.now()
rows = csv_import(path)
for row in rows:
checksums.append({
'origin': 'content,%s' % (content.id),
'code': str(row['checksum']),
'timestamp': timestamp,
'algorithm': str(row['algorithm']),
'begin': int(row['begin']),
'end': int(row['end'])
})
Checksum.create(checksums)
[docs]
def is_collision(contentrange, checksum):
begin, end, length = contentrange
collisions = Checksum.search_collision(
code=checksum.hexdigest(),
algorithm=_checksum_algorithm.__name__,
begin=begin,
end=end
)
if not collisions:
return False
return True
[docs]
def get_content_uuid():
while True:
content_uuid = str(uuid.uuid4())
if not Content.search_by_uuid(content_uuid):
break
return content_uuid
[docs]
def raise_abuse_rank(request):
rank_max = int(request.registry.settings['abuse_rank.max'])
current_rank = request.session['abuse_rank']['current']
current_rank = (current_rank + 1) % (rank_max + 1)
request.session['abuse_rank']['current'] = current_rank
log.debug(
(
"raised session abuse rank of user %s to %s\n"
) % (
request.user, request.session['abuse_rank']['current']
)
)
[docs]
def ban(request):
request.session['abuse_rank']['banned'] = True
request.session['abuse_rank']['bantime'] = time.time()
web_user = WebUser.current_web_user(request)
if not web_user.abuse_rank:
web_user.abuse_rank = 0
web_user.abuse_rank += 1
web_user.save()
log.info(
(
"banned upload for user %s (db abuse rank: %s)\n"
) % (
web_user, web_user.abuse_rank
)
)
[docs]
def is_banned(request):
if 'abuse_rank' not in request.session:
request.session['abuse_rank'] = {
'current': 0, 'banned': False, 'bantime': None
}
banned = request.session['abuse_rank']['banned']
if not banned:
return False
currenttime = time.time()
bantime = int(request.session['abuse_rank']['bantime'])
removeban = int(request.registry.settings['abuse_rank.removeban'])
if currenttime > bantime + removeban:
request.session['abuse_rank']['banned'] = False
request.session['abuse_rank']['current'] = 0
web_user = WebUser.current_web_user(request)
log.debug(
(
"removed upload ban for user %s (db abuse rank: %s)\n"
) % (
web_user, web_user.abuse_rank
)
)
return request.session['abuse_rank']['banned']
[docs]
def still_banned_for(request):
if 'abuse_rank' not in request.session:
return 0
banned = request.session['abuse_rank']['banned']
if not banned:
return 0
currenttime = time.time()
bantime = int(request.session['abuse_rank']['bantime'])
removeban = int(request.registry.settings['abuse_rank.removeban'])
seconds_still_banned_for = bantime + removeban - currenttime
if seconds_still_banned_for <= 0:
return 0
return seconds_still_banned_for
# --- resources ---------------------------------------------------------------
[docs]
class UserResource(object):
def __init__(self, request):
self.request = request
self.readonly = False
# triggered by ContextFound event to load resources after traversal
def _context_found(self):
if not self.readonly:
self._context_found_writable()
else:
self.context_found()
# wrapping function needed for writable transaction decorator
@Tdb.transaction(readonly=False)
def _context_found_writable(self):
self.context_found()
[docs]
def context_found(self):
pass
def __acl__(self):
# no webuser logged in
if not self.request.web_user:
return [DENY_ALL]
# webuser logged in
return [
(
Allow,
self.request.unauthenticated_userid,
('create', 'read', 'update', 'delete')
),
DENY_ALL
]
# --- service: upload ---------------------------------------------------------
repertoire_upload = Service(
name=_prefix + 'upload',
path=_prefix + '/v1/upload',
description="uploads repertoire files",
cors_policy=get_cors_policy(),
factory=UserResource
)
[docs]
@repertoire_upload.options(
permission=NO_PERMISSION_REQUIRED)
def options_repertoire_upload(request):
response = request.response
# cors headers explicitly not set for OPTIONS by cornice,
# but required for the jQuery File Upload plugin
response.headers['Access-Control-Allow-Headers'] = get_cors_headers()
return response
[docs]
@repertoire_upload.post(
permission='create')
@Tdb.transaction(readonly=False)
def post_repertoire_upload(request):
# create paths
create_paths(request)
# upload files
files = []
for name, fieldStorage in request.POST.items():
# check fieldStorage
if not isinstance(fieldStorage, FieldStorage):
continue
# configure upload
rank = (request.registry.settings['abuse_rank.active'] == 'true')
rank_max = int(request.registry.settings['abuse_rank.max'])
hostname = get_hostname()
descriptor = fieldStorage.file
filename = os.path.basename(fieldStorage.filename).encode('utf-8')
filename_hash = _hash_algorithm(filename).hexdigest()
temporary_path = get_path(request, _path_temporary, filename_hash)
contentrange = ContentRange.parse(
request.headers.get('Content-Range', None)
)
contentlength = request.headers.get('Content-Length', None)
# create checksum
with benchmark(request, name='checksum', uid=filename,
normalize=descriptor, scale=100*1024*1024):
checksum = create_checksum(
descriptor=descriptor,
algorithm=_checksum_algorithm
)
save_checksum(
path=temporary_path + _checksum_postfix,
algorithm=_checksum_algorithm.__name__,
checksum=checksum.hexdigest(),
contentrange=contentrange or (0, contentlength, contentlength)
)
# abuse rank
if rank:
if is_banned(request):
# TODO: number wont be replaced, also see
# BirthdateField line 300+ in register_webuser.py
files.append({
'name': fieldStorage.filename,
'error': _(
"Abuse detected. Wait for {number}"
" seconds before trying another"
" upload.",
mapping={'number': int(still_banned_for(request))}
)})
continue
if is_collision(contentrange, checksum):
raise_abuse_rank(request)
current_rank = request.session['abuse_rank']['current']
if current_rank == rank_max:
ban(request)
# save to filesystem (-> temporary)
ok, complete = save_upload_to_fs(
descriptor=descriptor,
absolute_path=temporary_path,
contentrange=contentrange
)
if not ok:
pass
if not complete:
# client feedback
files.append({
'name': fieldStorage.filename,
'size': os.path.getsize(temporary_path)
})
continue
# get content uuid
content_uuid = get_content_uuid()
# get uuid paths
uploaded_path = get_path(request, _path_uploaded, content_uuid)
rejected_path = get_path(request, _path_rejected, content_uuid)
file_category = get_category_from_mimetype(temporary_path)
file_size = os.path.getsize(temporary_path)
mime_type = str(mime.from_file(temporary_path))
# validate file
error = validate_upload(filename, temporary_path)
if error:
# move files (temporary -> rejected)
ok = move_files_with_prefixes(
source=temporary_path, target=rejected_path
)
if not ok:
panic(
request,
reason="Files could not be moved.",
identifiers=[filename_hash, content_uuid]
)
# save file to database
_content = {
'uuid': content_uuid,
'processing_hostname': hostname,
'processing_state': "rejected",
'rejection_reason': "format_error",
'entity_origin': "direct",
'entity_creator': WebUser.current_web_user(request).party,
'name': str(name),
'category': file_category,
'mime_type': mime_type,
'size': file_size,
'path': rejected_path
}
content = save_upload_to_db(_content)
if not content:
panic(
request,
reason="Content could not be created.",
identifiers=[filename_hash, content_uuid]
)
# save checksums to database
# admin feedback
# 2DO: Mail
log.info(
(
"Content rejected (format error): %s\n"
) % (
rejected_path
)
)
# client feedback
files.append({
'name': fieldStorage.filename,
'error': error
})
continue
# we used to create a preview, now done in repertoire processing
# this is only for displaying some file properties
# audio = AudioSegment.from_file(temporary_path)
file_category = get_category_from_mimetype(temporary_path)
# move files (temporary -> uploaded)
ok = move_files_with_prefixes(
source=temporary_path, target=uploaded_path
)
if not ok:
panic(
request,
reason="Files could not be moved.",
identifiers=[filename_hash, content_uuid]
)
# save file to database
_content = {
'uuid': content_uuid,
'processing_hostname': hostname,
'processing_state': "uploaded",
'entity_origin': "direct",
'entity_creator': WebUser.current_web_user(request).party,
'name': str(filename),
'category': file_category,
'mime_type': str(mime.from_file(uploaded_path)),
'size': os.path.getsize(uploaded_path),
'path': uploaded_path,
# 'length': "%.6f" % audio.duration_seconds,
# 'channels': int(audio.channels),
# 'sample_rate': int(audio.frame_rate),
# 'sample_width': int(audio.sample_width * 8)
}
content = save_upload_to_db(_content)
if not content:
panic(
request,
reason="Content could not be created.",
identifiers=[filename_hash, content_uuid]
)
# save checksums to database
save_checksums_to_db(
content=content,
path=uploaded_path + _checksum_postfix
)
# client feedback
files.append(get_content_info(request, content))
# finally, see if there are old temporary files in the temp folder
# structure
cleanup_temp_directory(request)
# TODO: add timestamp file in temp folder to track if cleanup run
# was already started this day
return {'files': files}
# --- service: list -----------------------------------------------------------
repertoire_list = Service(
name=_prefix + 'list',
path=_prefix + '/v1/list',
description="lists repertoire files",
cors_policy=get_cors_policy(),
factory=UserResource
)
[docs]
@repertoire_list.options(
permission=NO_PERMISSION_REQUIRED)
def options_repertoire_list(request):
return
[docs]
@repertoire_list.get(
permission='read')
def get_repertoire_list(request):
return {'files': []}
# --- service: show -----------------------------------------------------------
repertoire_show = Service(
name=_prefix + 'show',
path=_prefix + '/v1/show/{filename}',
description="checks partially uploaded files",
cors_policy=get_cors_policy(),
factory=UserResource
)
[docs]
@repertoire_show.options(
permission=NO_PERMISSION_REQUIRED)
def options_repertoire_show(request):
return
[docs]
@repertoire_show.get(
permission='read')
def get_repertoire_show(request):
filename = request.matchdict['filename'].encode('utf-8')
if not filename:
return {}
filename_hash = _hash_algorithm(filename).hexdigest()
absolute_path = get_path(request, _path_temporary, filename_hash)
if not os.path.isfile(absolute_path):
return {}
return {
'name': filename,
'size': os.path.getsize(absolute_path)
}
# --- service: preview --------------------------------------------------------
repertoire_preview = Service(
name=_prefix + 'preview',
path=_prefix + '/v1/preview/{id}',
description="previewed the uploaded repertoire files",
cors_policy=get_cors_policy(),
factory=UserResource
)
[docs]
@repertoire_preview.options(
permission=NO_PERMISSION_REQUIRED)
def options_repertoire_preview(request):
return
[docs]
@repertoire_preview.get(
permission='read')
def get_repertoire_preview(request):
return
# --- service: delete ---------------------------------------------------------
repertoire_delete = Service(
name=_prefix + 'delete',
path=_prefix + '/v1/delete/{id}',
description="deletes uploaded repertoire files",
cors_policy=get_cors_policy(),
factory=UserResource
)
[docs]
@repertoire_delete.options(
permission=NO_PERMISSION_REQUIRED)
def options_repertoire_delete(request):
return
[docs]
@repertoire_delete.get(
permission='delete')
@Tdb.transaction(readonly=False)
def get_repertoire_delete(request):
return