2 * @brief Extract metadata using libextractor.
4 /* Copyright (C) 2020 Parth Kapadia
5 * Copyright (C) 2022,2023 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 #include <extractor.h>
30 /** Store metadata in its corresponding variable.
32 * @param cls last parameter from EXTRACTOR_extract (unused)
33 * @param plugin_name name of the plugin (unused)
34 * @param type mime-type of file according to libextractor
35 * @param format format information about data
36 * @param data_mime_type mimetype according to libextractor (unused)
37 * @param data actual meta-data found
38 * @param data_len number of bytes in data
41 process_metadata(void*,
43 enum EXTRACTOR_MetaType type
,
44 enum EXTRACTOR_MetaFormat format
,
50 case EXTRACTOR_METAFORMAT_UTF8
:
54 // specific encoding unknown
55 // EXTRACTOR_METAFORMAT_UNKNOWN
56 // EXTRACTOR_METAFORMAT_BINARY
57 // EXTRACTOR_METAFORMAT_C_STRING
61 // "data_len is strlen (data)+1"!
65 case EXTRACTOR_METATYPE_BOOK_TITLE
:
66 case EXTRACTOR_METATYPE_JOURNAL_NAME
:
67 case EXTRACTOR_METATYPE_ORIGINAL_TITLE
:
68 case EXTRACTOR_METATYPE_SUBJECT
:
69 case EXTRACTOR_METATYPE_SUBTITLE
:
70 case EXTRACTOR_METATYPE_TITLE
:
71 send_field(FIELD_TITLE
, data
, data_len
);
74 case EXTRACTOR_METATYPE_PAGE_COUNT
: {
76 if (parse_unsigned(data
, p
)) {
77 send_field_page_count(int(p
));
82 case EXTRACTOR_METATYPE_ARTIST
:
83 case EXTRACTOR_METATYPE_AUTHOR_NAME
:
84 case EXTRACTOR_METATYPE_COMPOSER
:
85 case EXTRACTOR_METATYPE_CONDUCTOR
:
86 case EXTRACTOR_METATYPE_CREATOR
:
87 case EXTRACTOR_METATYPE_MOVIE_DIRECTOR
:
88 case EXTRACTOR_METATYPE_ORIGINAL_ARTIST
:
89 case EXTRACTOR_METATYPE_ORIGINAL_PERFORMER
:
90 case EXTRACTOR_METATYPE_ORIGINAL_WRITER
:
91 case EXTRACTOR_METATYPE_PERFORMER
:
92 case EXTRACTOR_METATYPE_WRITER
:
93 send_field(FIELD_AUTHOR
, data
, data_len
);
96 case EXTRACTOR_METATYPE_KEYWORDS
:
97 send_field(FIELD_KEYWORDS
, data
, data_len
);
100 case EXTRACTOR_METATYPE_ABSTRACT
:
101 case EXTRACTOR_METATYPE_COMMENT
:
102 case EXTRACTOR_METATYPE_DESCRIPTION
:
103 case EXTRACTOR_METATYPE_LYRICS
:
104 case EXTRACTOR_METATYPE_SUMMARY
:
105 send_field(FIELD_BODY
, data
, data_len
);
109 // Ignore other metadata.
115 static struct EXTRACTOR_PluginList
* plugins
;
120 // Add all default plugins.
121 plugins
= EXTRACTOR_plugin_add_defaults(EXTRACTOR_OPTION_DEFAULT_POLICY
);
122 return plugins
!= nullptr;
126 extract(const string
& filename
, const string
&)
128 // If plugin not found/ File format not recognised/ corrupt file
129 // no data is extracted, rather than reporting an error.
130 EXTRACTOR_extract(plugins
, filename
.c_str(),
132 &process_metadata
, nullptr);