Add conversion from ISO-639-2/B codes to language names
This commit is contained in:
parent
2725bad686
commit
6e9b39726d
|
@ -47,6 +47,7 @@ set(HEADER_FILES
|
|||
id3/id3v2tag.h
|
||||
ivf/ivfframe.h
|
||||
ivf/ivfstream.h
|
||||
language.h
|
||||
localeawarestring.h
|
||||
margin.h
|
||||
matroska/ebmlelement.h
|
||||
|
@ -121,6 +122,7 @@ set(SRC_FILES
|
|||
id3/id3v2tag.cpp
|
||||
ivf/ivfframe.cpp
|
||||
ivf/ivfstream.cpp
|
||||
language.cpp
|
||||
localeawarestring.cpp
|
||||
matroska/ebmlelement.cpp
|
||||
matroska/matroskaattachment.cpp
|
||||
|
@ -200,3 +202,17 @@ include(LibraryTarget)
|
|||
include(TestTarget)
|
||||
include(Doxygen)
|
||||
include(ConfigHeader)
|
||||
|
||||
# write languages header from CSV file
|
||||
set(LANGUAGES_HEADER "static const std::unordered_map<std::string, std::string> languages = \{")
|
||||
file(STRINGS languages.csv LANGUAGE_ROWS ENCODING UTF-8)
|
||||
foreach (LANGUAGE_ROW ${LANGUAGE_ROWS})
|
||||
if (NOT LANGUAGE_ROW MATCHES "([a-z][a-z]) ,([a-z][a-z][a-z]) ,([a-z][a-z][a-z]) ,\"?([^\",]*) \"?,\"?([^\",]*) \"?")
|
||||
continue()
|
||||
endif ()
|
||||
set(LANGUAGE_ABBREVIATION "${CMAKE_MATCH_3}")
|
||||
set(LANGUAGE_NAME "${CMAKE_MATCH_4}")
|
||||
set(LANGUAGES_HEADER "${LANGUAGES_HEADER}\n \{\"${LANGUAGE_ABBREVIATION}\", \"${LANGUAGE_NAME}\"\},")
|
||||
endforeach ()
|
||||
set(LANGUAGES_HEADER "${LANGUAGES_HEADER}\n};")
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/resources/languages.h" "${LANGUAGES_HEADER}")
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include "./abstracttrack.h"
|
||||
#include "./exceptions.h"
|
||||
#include "./language.h"
|
||||
#include "./mediaformat.h"
|
||||
|
||||
#include "./mp4/mp4ids.h"
|
||||
|
@ -137,8 +138,8 @@ string AbstractTrack::label() const
|
|||
if (!name().empty()) {
|
||||
ss << ", name: \"" << name() << "\"";
|
||||
}
|
||||
if (!language().empty() && language() != "und") {
|
||||
ss << ", language: \"" << language() << "\"";
|
||||
if (isLanguageDefined(language())) {
|
||||
ss << ", language: " << languageNameFromIsoWithFallback(language()) << "";
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
#include "./language.h"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace TagParser {
|
||||
|
||||
/// \cond
|
||||
static const auto &languageMapping()
|
||||
{
|
||||
#include "resources/languages.h"
|
||||
return languages;
|
||||
}
|
||||
/// \endcond
|
||||
|
||||
/*!
|
||||
* \brief Returns the language name for the specified ISO-639-2 code (bibliographic, 639-2/B).
|
||||
* \remarks If \a isoCode is unknown an empty string is returned.
|
||||
*/
|
||||
const std::string &languageNameFromIso(const std::string &isoCode)
|
||||
{
|
||||
const auto &mapping = languageMapping();
|
||||
const auto i = mapping.find(isoCode);
|
||||
if (i == mapping.cend()) {
|
||||
static const std::string empty;
|
||||
return empty;
|
||||
}
|
||||
return i->second;
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Returns the language name for the specified ISO-639-2 code (bibliographic, 639-2/B).
|
||||
* \remarks If \a isoCode is unknown the \a isoCode itself is returned.
|
||||
*/
|
||||
const std::string &languageNameFromIsoWithFallback(const std::string &isoCode)
|
||||
{
|
||||
const auto &mapping = languageMapping();
|
||||
const auto i = mapping.find(isoCode);
|
||||
if (i == mapping.cend()) {
|
||||
return isoCode;
|
||||
}
|
||||
return i->second;
|
||||
}
|
||||
|
||||
} // namespace TagParser
|
|
@ -0,0 +1,26 @@
|
|||
#ifndef TAG_PARSER_LANGUAGE_H
|
||||
#define TAG_PARSER_LANGUAGE_H
|
||||
|
||||
#include "./global.h"
|
||||
|
||||
#include <c++utilities/conversion/stringbuilder.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace TagParser {
|
||||
|
||||
/*!
|
||||
* \brief Returns whether \a languageSpecification is not empty or undefined.
|
||||
*/
|
||||
inline bool isLanguageDefined(const std::string &languageSpecification)
|
||||
{
|
||||
return !languageSpecification.empty() && languageSpecification != "und";
|
||||
}
|
||||
|
||||
TAG_PARSER_EXPORT const std::string &languageNameFromIso(const std::string &isoCode);
|
||||
TAG_PARSER_EXPORT const std::string &languageNameFromIsoWithFallback(const std::string &isoCode);
|
||||
|
||||
} // namespace TagParser
|
||||
|
||||
#endif // TAG_PARSER_LANGUAGE_H
|
|
@ -0,0 +1,186 @@
|
|||
639-1 ,639-2/T ,639-2/B ,Language name ,Native name
|
||||
aa ,aar ,aar ,Afar ,Afaraf
|
||||
ab ,abk ,abk ,Abkhaz ,"аҧсуа бызшәа, аҧсшәа "
|
||||
ae ,ave ,ave ,Avestan ,avesta
|
||||
af ,afr ,afr ,Afrikaans ,Afrikaans
|
||||
ak ,aka ,aka ,Akan ,Akan
|
||||
am ,amh ,amh ,Amharic ,አማርኛ
|
||||
an ,arg ,arg ,Aragonese ,aragonés
|
||||
ar ,ara ,ara ,Arabic ,العربية
|
||||
as ,asm ,asm ,Assamese ,অসমীয়া
|
||||
av ,ava ,ava ,Avaric ,"авар мацӀ, магӀарул мацӀ "
|
||||
ay ,aym ,aym ,Aymara ,aymar aru
|
||||
az ,aze ,aze ,Azerbaijani ,azərbaycan dili
|
||||
az ,azb ,azb ,South Azerbaijani ,تورکجه
|
||||
ba ,bak ,bak ,Bashkir ,башҡорт теле
|
||||
be ,bel ,bel ,Belarusian ,беларуская мова
|
||||
bg ,bul ,bul ,Bulgarian ,български език
|
||||
bh ,bih ,bih ,Bihari ,भोजपुरी
|
||||
bi ,bis ,bis ,Bislama ,Bislama
|
||||
bm ,bam ,bam ,Bambara ,bamanankan
|
||||
bn ,ben ,ben ,Bengali; Bangla ,বাংলা
|
||||
bo ,bod ,tib ,"Tibetan Standard, Tibetan, Central ",བོད་ཡིག
|
||||
br ,bre ,bre ,Breton ,brezhoneg
|
||||
bs ,bos ,bos ,Bosnian ,bosanski jezik
|
||||
ca ,cat ,cat ,Catalan; Valencian ,"català, valencià "
|
||||
ce ,che ,che ,Chechen ,нохчийн мотт
|
||||
ch ,cha ,cha ,Chamorro ,Chamoru
|
||||
co ,cos ,cos ,Corsican ,"corsu, lingua corsa "
|
||||
cr ,cre ,cre ,Cree ,ᓀᐦᐃᔭᐍᐏᐣ
|
||||
cs ,ces ,cze ,Czech ,"čeština, český jazyk "
|
||||
cu ,chu ,chu ,"Old Church Slavonic, Church Slavonic, Old Bulgarian ",ѩзыкъ словѣньскъ
|
||||
cv ,chv ,chv ,Chuvash ,чӑваш чӗлхи
|
||||
cy ,cym ,wel ,Welsh ,Cymraeg
|
||||
da ,dan ,dan ,Danish ,dansk
|
||||
de ,deu ,ger ,German ,Deutsch
|
||||
dv ,div ,div ,Divehi; Dhivehi; Maldivian; ,ދިވެހި
|
||||
dz ,dzo ,dzo ,Dzongkha ,རྫོང་ཁ
|
||||
ee ,ewe ,ewe ,Ewe ,Eʋegbe
|
||||
el ,ell ,gre ,"Greek, Modern ",ελληνικά
|
||||
en ,eng ,eng ,English ,English
|
||||
eo ,epo ,epo ,Esperanto ,Esperanto
|
||||
es ,spa ,spa ,Spanish; Castilian ,"español, castellano "
|
||||
et ,est ,est ,Estonian ,"eesti, eesti keel "
|
||||
eu ,eus ,baq ,Basque ,"euskara, euskera "
|
||||
fa ,fas ,per ,Persian (Farsi) ,فارسی
|
||||
ff ,ful ,ful ,Fula; Fulah; Pulaar; Pular ,"Fulfulde, Pulaar, Pular "
|
||||
fi ,fin ,fin ,Finnish ,"suomi, suomen kieli "
|
||||
fj ,fij ,fij ,Fijian ,vosa Vakaviti
|
||||
fo ,fao ,fao ,Faroese ,føroyskt
|
||||
fr ,fra ,fre ,French ,"français, langue française "
|
||||
fy ,fry ,fry ,Western Frisian ,Frysk
|
||||
ga ,gle ,gle ,Irish ,Gaeilge
|
||||
gd ,gla ,gla ,Scottish Gaelic; Gaelic ,Gàidhlig
|
||||
gl ,glg ,glg ,Galician ,galego
|
||||
gn ,grn ,grn ,Guaraní ,Avañe'ẽ
|
||||
gu ,guj ,guj ,Gujarati ,ગુજરાતી
|
||||
gv ,glv ,glv ,Manx ,"Gaelg, Gailck "
|
||||
ha ,hau ,hau ,Hausa ,"Hausa, هَوُسَ "
|
||||
he ,heb ,heb ,Hebrew (modern) ,עברית
|
||||
hi ,hin ,hin ,Hindi ,"हिन्दी, हिंदी "
|
||||
ho ,hmo ,hmo ,Hiri Motu ,Hiri Motu
|
||||
hr ,hrv ,hrv ,Croatian ,hrvatski jezik
|
||||
ht ,hat ,hat ,Haitian; Haitian Creole ,Kreyòl ayisyen
|
||||
hu ,hun ,hun ,Hungarian ,magyar
|
||||
hy ,hye ,arm ,Armenian ,Հայերեն
|
||||
hz ,her ,her ,Herero ,Otjiherero
|
||||
ia ,ina ,ina ,Interlingua ,Interlingua
|
||||
id ,ind ,ind ,Indonesian ,Bahasa Indonesia
|
||||
ie ,ile ,ile ,Interlingue ,Originally called Occidental; then Interlingue after WWII
|
||||
ig ,ibo ,ibo ,Igbo ,Asụsụ Igbo
|
||||
ii ,iii ,iii ,Nuosu ,ꆈꌠ꒿ Nuosuhxop
|
||||
ik ,ipk ,ipk ,Inupiaq ,"Iñupiaq, Iñupiatun "
|
||||
io ,ido ,ido ,Ido ,Ido
|
||||
is ,isl ,ice ,Icelandic ,Íslenska
|
||||
it ,ita ,ita ,Italian ,italiano
|
||||
iu ,iku ,iku ,Inuktitut ,ᐃᓄᒃᑎᑐᑦ
|
||||
ja ,jpn ,jpn ,Japanese ,日本語 (にほんご)
|
||||
jv ,jav ,jav ,Javanese ,basa Jawa
|
||||
ka ,kat ,geo ,Georgian ,ქართული
|
||||
kg ,kon ,kon ,Kongo ,KiKongo
|
||||
ki ,kik ,kik ,"Kikuyu, Gikuyu ",Gĩkũyũ
|
||||
kj ,kua ,kua ,"Kwanyama, Kuanyama ",Kuanyama
|
||||
kk ,kaz ,kaz ,Kazakh ,қазақ тілі
|
||||
kl ,kal ,kal ,"Kalaallisut, Greenlandic ","kalaallisut, kalaallit oqaasii "
|
||||
km ,khm ,khm ,Khmer ,"ខ្មែរ, ខេមរភាសា, ភាសាខ្មែរ "
|
||||
kn ,kan ,kan ,Kannada ,ಕನ್ನಡ
|
||||
ko ,kor ,kor ,Korean ,"한국어 (韓國語), 조선어 (朝鮮語) "
|
||||
kr ,kau ,kau ,Kanuri ,Kanuri
|
||||
ks ,kas ,kas ,Kashmiri ,"कश्मीरी, كشميري "
|
||||
ku ,kur ,kur ,Kurdish ,"Kurdî, كوردی "
|
||||
kv ,kom ,kom ,Komi ,коми кыв
|
||||
kw ,cor ,cor ,Cornish ,Kernewek
|
||||
ky ,kir ,kir ,Kyrgyz ,"Кыргызча, Кыргыз тили "
|
||||
la ,lat ,lat ,Latin ,"latine, lingua latina "
|
||||
lb ,ltz ,ltz ,"Luxembourgish, Letzeburgesch ",Lëtzebuergesch
|
||||
lg ,lug ,lug ,Ganda ,Luganda
|
||||
li ,lim ,lim ,"Limburgish, Limburgan, Limburger ",Limburgs
|
||||
ln ,lin ,lin ,Lingala ,Lingála
|
||||
lo ,lao ,lao ,Lao ,ພາສາລາວ
|
||||
lt ,lit ,lit ,Lithuanian ,lietuvių kalba
|
||||
lu ,lub ,lub ,Luba-Katanga ,Tshiluba
|
||||
lv ,lav ,lav ,Latvian ,latviešu valoda
|
||||
mg ,mlg ,mlg ,Malagasy ,fiteny malagasy
|
||||
mh ,mah ,mah ,Marshallese ,Kajin M̧ajeļ
|
||||
mi ,mri ,mao ,Māori ,te reo Māori
|
||||
mk ,mkd ,mac ,Macedonian ,македонски јазик
|
||||
ml ,mal ,mal ,Malayalam ,മലയാളം
|
||||
mn ,mon ,mon ,Mongolian ,монгол
|
||||
mr ,mar ,mar ,Marathi (Marāṭhī) ,मराठी
|
||||
ms ,msa ,may ,Malay ,"bahasa Melayu, بهاس ملايو "
|
||||
mt ,mlt ,mlt ,Maltese ,Malti
|
||||
my ,mya ,bur ,Burmese ,ဗမာစာ
|
||||
na ,nau ,nau ,Nauru ,Ekakairũ Naoero
|
||||
nb ,nob ,nob ,Norwegian Bokmål ,Norsk bokmål
|
||||
nd ,nde ,nde ,North Ndebele ,isiNdebele
|
||||
ne ,nep ,nep ,Nepali ,नेपाली
|
||||
ng ,ndo ,ndo ,Ndonga ,Owambo
|
||||
nl ,nld ,dut ,Dutch ,"Nederlands, Vlaams "
|
||||
nn ,nno ,nno ,Norwegian Nynorsk ,Norsk nynorsk
|
||||
no ,nor ,nor ,Norwegian ,Norsk
|
||||
nr ,nbl ,nbl ,South Ndebele ,isiNdebele
|
||||
nv ,nav ,nav ,"Navajo, Navaho ","Diné bizaad, Dinékʼehǰí "
|
||||
ny ,nya ,nya ,Chichewa; Chewa; Nyanja ,"chiCheŵa, chinyanja "
|
||||
oc ,oci ,oci ,Occitan ,"occitan, lenga d'òc "
|
||||
oj ,oji ,oji ,"Ojibwe, Ojibwa ",ᐊᓂᔑᓈᐯᒧᐎᓐ
|
||||
om ,orm ,orm ,Oromo ,Afaan Oromoo
|
||||
or ,ori ,ori ,Oriya ,ଓଡ଼ିଆ
|
||||
os ,oss ,oss ,"Ossetian, Ossetic ",ирон æвзаг
|
||||
pa ,pan ,pan ,"Panjabi, Punjabi ","ਪੰਜਾਬੀ, پنجابی "
|
||||
pi ,pli ,pli ,Pāli ,पाऴि
|
||||
pl ,pol ,pol ,Polish ,"język polski, polszczyzna "
|
||||
ps ,pus ,pus ,"Pashto, Pushto ",پښتو
|
||||
pt ,por ,por ,Portuguese ,português
|
||||
qu ,que ,que ,Quechua ,"Runa Simi, Kichwa "
|
||||
rm ,roh ,roh ,Romansh ,rumantsch grischun
|
||||
rn ,run ,run ,Kirundi ,Ikirundi
|
||||
ro ,ron ,rum ,Romanian ,limba română
|
||||
ru ,rus ,rus ,Russian ,русский язык
|
||||
rw ,kin ,kin ,Kinyarwanda ,Ikinyarwanda
|
||||
sa ,san ,san ,Sanskrit (Saṁskṛta) ,संस्कृतम्
|
||||
sc ,srd ,srd ,Sardinian ,sardu
|
||||
sd ,snd ,snd ,Sindhi ,"सिन्धी, سنڌي، سندھی "
|
||||
se ,sme ,sme ,Northern Sami ,Davvisámegiella
|
||||
sg ,sag ,sag ,Sango ,yângâ tî sängö
|
||||
si ,sin ,sin ,"Sinhala, Sinhalese ",සිංහල
|
||||
sk ,slk ,slo ,Slovak ,"slovenčina, slovenský jazyk "
|
||||
sl ,slv ,slv ,Slovene ,"slovenski jezik, slovenščina "
|
||||
sm ,smo ,smo ,Samoan ,gagana fa'a Samoa
|
||||
sn ,sna ,sna ,Shona ,chiShona
|
||||
so ,som ,som ,Somali ,"Soomaaliga, af Soomaali "
|
||||
sq ,sqi ,alb ,Albanian ,gjuha shqipe
|
||||
sr ,srp ,srp ,Serbian ,српски језик
|
||||
ss ,ssw ,ssw ,Swati ,SiSwati
|
||||
st ,sot ,sot ,Southern Sotho ,Sesotho
|
||||
su ,sun ,sun ,Sundanese ,Basa Sunda
|
||||
sv ,swe ,swe ,Swedish ,Svenska
|
||||
sw ,swa ,swa ,Swahili ,Kiswahili
|
||||
ta ,tam ,tam ,Tamil ,தமிழ்
|
||||
te ,tel ,tel ,Telugu ,తెలుగు
|
||||
tg ,tgk ,tgk ,Tajik ,"тоҷикӣ, toğikī, تاجیکی "
|
||||
th ,tha ,tha ,Thai ,ไทย
|
||||
ti ,tir ,tir ,Tigrinya ,ትግርኛ
|
||||
tk ,tuk ,tuk ,Turkmen ,"Türkmen, Түркмен "
|
||||
tl ,tgl ,tgl ,Tagalog ,"Wikang Tagalog, ᜏᜒᜃᜅ᜔ ᜆᜄᜎᜓᜄ᜔ "
|
||||
tn ,tsn ,tsn ,Tswana ,Setswana
|
||||
to ,ton ,ton ,Tonga (Tonga Islands) ,faka Tonga
|
||||
tr ,tur ,tur ,Turkish ,Türkçe
|
||||
ts ,tso ,tso ,Tsonga ,Xitsonga
|
||||
tt ,tat ,tat ,Tatar ,"татар теле, tatar tele "
|
||||
tw ,twi ,twi ,Twi ,Twi
|
||||
ty ,tah ,tah ,Tahitian ,Reo Tahiti
|
||||
ug ,uig ,uig ,"Uyghur, Uighur ","Uyƣurqə, ئۇيغۇرچە "
|
||||
uk ,ukr ,ukr ,Ukrainian ,українська мова
|
||||
ur ,urd ,urd ,Urdu ,اردو
|
||||
uz ,uzb ,uzb ,Uzbek ,"O‘zbek, Ўзбек, أۇزبېك "
|
||||
ve ,ven ,ven ,Venda ,Tshivenḓa
|
||||
vi ,vie ,vie ,Vietnamese ,Tiếng Việt
|
||||
vo ,vol ,vol ,Volapük ,Volapük
|
||||
wa ,wln ,wln ,Walloon ,walon
|
||||
wo ,wol ,wol ,Wolof ,Wollof
|
||||
xh ,xho ,xho ,Xhosa ,isiXhosa
|
||||
yi ,yid ,yid ,Yiddish ,ייִדיש
|
||||
yo ,yor ,yor ,Yoruba ,Yorùbá
|
||||
za ,zha ,zha ,"Zhuang, Chuang ","Saɯ cueŋƅ, Saw cuengh "
|
||||
zh ,zho ,chi ,Chinese ,"中文 (Zhōngwén), 汉语, 漢語 "
|
||||
zu ,zul ,zul ,Zulu ,isiZulu
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "./backuphelper.h"
|
||||
#include "./diagnostics.h"
|
||||
#include "./exceptions.h"
|
||||
#include "./language.h"
|
||||
#include "./progressfeedback.h"
|
||||
#include "./signature.h"
|
||||
#include "./tag.h"
|
||||
|
@ -902,12 +903,11 @@ unordered_set<string> MediaFileInfo::availableLanguages(MediaType type) const
|
|||
if (m_container) {
|
||||
for (size_t i = 0, count = m_container->trackCount(); i != count; ++i) {
|
||||
const AbstractTrack *track = m_container->track(i);
|
||||
if ((type == MediaType::Unknown || track->mediaType() == type) && !track->language().empty() && track->language() != "und") {
|
||||
if ((type == MediaType::Unknown || track->mediaType() == type) && isLanguageDefined(track->language())) {
|
||||
res.emplace(track->language());
|
||||
}
|
||||
}
|
||||
} else if (m_singleTrack && (type == MediaType::Unknown || m_singleTrack->mediaType() == type) && !m_singleTrack->language().empty()
|
||||
&& m_singleTrack->language() != "und") {
|
||||
} else if (m_singleTrack && (type == MediaType::Unknown || m_singleTrack->mediaType() == type) && isLanguageDefined(m_singleTrack->language())) {
|
||||
res.emplace(m_singleTrack->language());
|
||||
}
|
||||
return res;
|
||||
|
|
|
@ -186,6 +186,6 @@ void MediaFileInfoTests::testFullParseAndFurtherProperties()
|
|||
CPPUNIT_ASSERT_EQUAL(unordered_set<string>({ "eng" }), file.availableLanguages());
|
||||
CPPUNIT_ASSERT_EQUAL(unordered_set<string>({}), file.availableLanguages(MediaType::Text));
|
||||
CPPUNIT_ASSERT_EQUAL("ID: 2422994868, type: Video"s, file.tracks()[0]->label());
|
||||
CPPUNIT_ASSERT_EQUAL("ID: 3653291187, type: Audio, language: \"eng\""s, file.tracks()[1]->label());
|
||||
CPPUNIT_ASSERT_EQUAL("ID: 3653291187, type: Audio, language: English"s, file.tracks()[1]->label());
|
||||
CPPUNIT_ASSERT_EQUAL("MS-MPEG-4-480p / MP3-2ch-eng"s, file.technicalSummary());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue