From 2df0344faf438e047df17d045549193efcf59339 Mon Sep 17 00:00:00 2001 From: Olly Betts Date: Mon, 29 Aug 2022 15:05:22 +1200 Subject: [PATCH] Handle in SVG Previously only <dc:title> inside <metadata> was considered. If both are present, <title> now takes preference. (cherry picked from commit c69249f791f0e252456ae422a8c8b2608f60ed70) --- xapian-applications/omega/svgparse.cc | 13 ++++++++--- xapian-applications/omega/svgparse.h | 6 +++-- .../omega/testfiles/svg/diagram.svg | 26 ++++++++++++++++++++++ 3 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 xapian-applications/omega/testfiles/svg/diagram.svg diff --git a/xapian-applications/omega/svgparse.cc b/xapian-applications/omega/svgparse.cc index d611db780..431f8c72c 100644 --- a/xapian-applications/omega/svgparse.cc +++ b/xapian-applications/omega/svgparse.cc @@ -1,7 +1,7 @@ /** @file * @brief Extract text from an SVG file. */ -/* Copyright (C) 2010,2011,2018 Olly Betts +/* Copyright (C) 2010-2022 Olly Betts * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,6 +36,10 @@ SvgParser::process_text(const string &text) case TITLE: target = &title; break; + case DC_TITLE: + // Prefer <title> to <dc:title>. + if (!title.empty()) return; + break; case KEYWORDS: target = &keywords; break; @@ -60,6 +64,8 @@ SvgParser::opening_tag(const string &tag) state = TEXT; else if (tag == "metadata" || tag == "svg:metadata") state = METADATA; + else if (tag == "title") + state = TITLE; break; case METADATA: // Ignore nested "dc:" tags - for example dc:title is also used to @@ -67,14 +73,14 @@ SvgParser::opening_tag(const string &tag) if (dc_tag.empty() && startswith(tag, "dc:")) { dc_tag = tag; if (tag == "dc:title") - state = TITLE; + state = DC_TITLE; else if (tag == "dc:subject") state = KEYWORDS; else if (tag == "dc:creator") state = AUTHOR; } break; - case KEYWORDS: case TEXT: case TITLE: case AUTHOR: + case DC_TITLE: case KEYWORDS: case TEXT: case TITLE: case AUTHOR: // Avoid compiler warnings. break; } @@ -85,6 +91,7 @@ bool SvgParser::closing_tag(const string &tag) { if (tag == "text" || tag == "svg:text" || + tag == "title" || tag == "metadata" || tag == "svg:metadata") { state = OTHER; } else if (tag == dc_tag) { diff --git a/xapian-applications/omega/svgparse.h b/xapian-applications/omega/svgparse.h index f208f1837..9cf499bf4 100644 --- a/xapian-applications/omega/svgparse.h +++ b/xapian-applications/omega/svgparse.h @@ -1,7 +1,7 @@ /** @file * @brief Extract text from an SVG file. */ -/* Copyright (C) 2010,2011 Olly Betts +/* Copyright (C) 2010-2022 Olly Betts * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,7 +24,9 @@ #include "htmlparse.h" class SvgParser : public HtmlParser { - enum { OTHER, TEXT, METADATA, TITLE, KEYWORDS, AUTHOR } state; + enum { + OTHER, TEXT, METADATA, DC_TITLE, TITLE, KEYWORDS, AUTHOR + } state = OTHER; string dc_tag; public: diff --git a/xapian-applications/omega/testfiles/svg/diagram.svg b/xapian-applications/omega/testfiles/svg/diagram.svg new file mode 100644 index 000000000..577798c81 --- /dev/null +++ b/xapian-applications/omega/testfiles/svg/diagram.svg @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<svg version="1.1" baseProfile="full" +xmlns="http://www.w3.org/2000/svg" +xmlns:xlink="http://www.w3.org/1999/xlink" +xmlns:ev="http://www.w3.org/2001/xml-events" +width="120.160mm" height="147.910mm" +viewBox="0 0 120.160 147.910"> +<title>Diagram + + + + + + +Start + + + + + + + + + + + -- 2.11.4.GIT