Delete all French stopwords
Clash Royale CLAN TAG#URR8PPP
up vote
2
down vote
favorite
I have a list of french stopwords:
frenchStopWords = "alors", "au", "aucuns", "aussi", "autre", "avant", "avec", "avoir",
"bon", "car", "ce", "cela", "ces", "ceux", "chaque", "ci", "comme",
"comment", "dans", "des", "du", "dedans", "dehors", "depuis",
"devrait", "doit", "donc", "dos", "début", "elle", "elles", "en",
"encore", "essai", "est", "et", "eu", "fait", "faites", "fois",
"font", "hors", "ici", "il", "ils", "je", "juste", "la", "le", "les",
"leur", "lÃÂ ", "ma", "maintenant", "mais", "mes", "mine", "moins",
"mon", "mot", "même", "ni", "nommés", "notre", "nous", "ou", "où",
"par", "parce", "pas", "peut", "peu", "plupart", "pour", "pourquoi",
"quand", "que", "quel", "quelle", "quelles", "quels", "qui", "sa",
"sans", "ses", "seulement", "si", "sien", "son", "sont", "sous",
"soyez", "sujet", "sur", "ta", "tandis", "tellement", "tels", "tes",
"ton", "tous", "tout", "trop", "très", "tu", "voient", "vont",
"votre", "vous", "vu", "ça", "étaient", "état", "étions", "été",
"être";
And some french text here:
text = "et", "bien", "bonjour,", "vous", "avez", "déjà", "suivi,",
"peut-être", "le", "cours", "electronique", "et,", "voilà,", "ça", "c
[CloseCurlyQuote]est", "le", "cours", "electronique", "ii,", "c
[CloseCurlyQuote]est", "une", "suite", "logique", "du", "premier",
"cours,", "dans", "le", "premier", "cours", "dans", "l
[CloseCurlyQuote]électronique", ",", "étudié,", "tout", "ce", "qui",
"était", "lié", "aux", "fonctions,", "électroniques", "basées",
"sur", "les", "amplificateurs,", "opérationnels.", "dans", "ce",
"cours", "lÃÂ ,,", "va", "aborder", "le", "transistor", "bipolaire,",
"et", "les", "fonctions", "analogiques", "de", "base.,", "donc",
"va", "partir", "avec", "toutes,", "les", "fonctions", "de", "base",
"depuis", "l[CloseCurlyQuote]analyse,", "du", "transistor", "jusqu
[CloseCurlyQuote]ÃÂ ", "ce", "qu[CloseCurlyQuote],", "arrive",
"avec", "des", "fonctions", "un", "peu,", "plus", "complexes,", "du",
"style", "analyser", "un,", "régulateur", "série", "et", "terminer",
"avec,", "l[CloseCurlyQuote]analyse", "des", "circuits", "tels",
"que,", "les", "amplificateurs", "de", "puissance,", "et", "les",
"amplificateurs", "audio.", "donc", "si", "regarde", "ce", "cours,,",
"va", "se", "rendre", "compte", "que", "la", "suite,", "d
[CloseCurlyQuote]electronique", "c[CloseCurlyQuote]est",
"electronique", "ii", ".,", "je", "vais", "aller", "dans", "ce",
"cours,", "et", "voir", "ce", "qui", "va", "se", "passer.,", "je",
"vais", "aller", "lÃÂ -dedans,", "et", "voir", "comment", "c
[CloseCurlyQuote]est", "structuré", "ce", "cours.,", "donc,",
"exactement", "de", "la", "même", "manière", "que", "electronique",
",", "va", "trouver,", "les", "semaines.,", "il", "y", "l
[CloseCurlyQuote]équivalent", "de", "8", "semaines,", "d
[CloseCurlyQuote]études,", "dont", "deux", "examens.,", "un",
"examen";
Now I want to delete all stopwords. I tried:
DeleteCases[text,#]&/@frenchStopWords
...but this is wrong. I know that I need to use an "OR" operator, but I don't know how to implment it without running a for loop. Thanks.
string-manipulation natural-language
add a comment |Â
up vote
2
down vote
favorite
I have a list of french stopwords:
frenchStopWords = "alors", "au", "aucuns", "aussi", "autre", "avant", "avec", "avoir",
"bon", "car", "ce", "cela", "ces", "ceux", "chaque", "ci", "comme",
"comment", "dans", "des", "du", "dedans", "dehors", "depuis",
"devrait", "doit", "donc", "dos", "début", "elle", "elles", "en",
"encore", "essai", "est", "et", "eu", "fait", "faites", "fois",
"font", "hors", "ici", "il", "ils", "je", "juste", "la", "le", "les",
"leur", "lÃÂ ", "ma", "maintenant", "mais", "mes", "mine", "moins",
"mon", "mot", "même", "ni", "nommés", "notre", "nous", "ou", "où",
"par", "parce", "pas", "peut", "peu", "plupart", "pour", "pourquoi",
"quand", "que", "quel", "quelle", "quelles", "quels", "qui", "sa",
"sans", "ses", "seulement", "si", "sien", "son", "sont", "sous",
"soyez", "sujet", "sur", "ta", "tandis", "tellement", "tels", "tes",
"ton", "tous", "tout", "trop", "très", "tu", "voient", "vont",
"votre", "vous", "vu", "ça", "étaient", "état", "étions", "été",
"être";
And some french text here:
text = "et", "bien", "bonjour,", "vous", "avez", "déjà", "suivi,",
"peut-être", "le", "cours", "electronique", "et,", "voilà,", "ça", "c
[CloseCurlyQuote]est", "le", "cours", "electronique", "ii,", "c
[CloseCurlyQuote]est", "une", "suite", "logique", "du", "premier",
"cours,", "dans", "le", "premier", "cours", "dans", "l
[CloseCurlyQuote]électronique", ",", "étudié,", "tout", "ce", "qui",
"était", "lié", "aux", "fonctions,", "électroniques", "basées",
"sur", "les", "amplificateurs,", "opérationnels.", "dans", "ce",
"cours", "lÃÂ ,,", "va", "aborder", "le", "transistor", "bipolaire,",
"et", "les", "fonctions", "analogiques", "de", "base.,", "donc",
"va", "partir", "avec", "toutes,", "les", "fonctions", "de", "base",
"depuis", "l[CloseCurlyQuote]analyse,", "du", "transistor", "jusqu
[CloseCurlyQuote]ÃÂ ", "ce", "qu[CloseCurlyQuote],", "arrive",
"avec", "des", "fonctions", "un", "peu,", "plus", "complexes,", "du",
"style", "analyser", "un,", "régulateur", "série", "et", "terminer",
"avec,", "l[CloseCurlyQuote]analyse", "des", "circuits", "tels",
"que,", "les", "amplificateurs", "de", "puissance,", "et", "les",
"amplificateurs", "audio.", "donc", "si", "regarde", "ce", "cours,,",
"va", "se", "rendre", "compte", "que", "la", "suite,", "d
[CloseCurlyQuote]electronique", "c[CloseCurlyQuote]est",
"electronique", "ii", ".,", "je", "vais", "aller", "dans", "ce",
"cours,", "et", "voir", "ce", "qui", "va", "se", "passer.,", "je",
"vais", "aller", "lÃÂ -dedans,", "et", "voir", "comment", "c
[CloseCurlyQuote]est", "structuré", "ce", "cours.,", "donc,",
"exactement", "de", "la", "même", "manière", "que", "electronique",
",", "va", "trouver,", "les", "semaines.,", "il", "y", "l
[CloseCurlyQuote]équivalent", "de", "8", "semaines,", "d
[CloseCurlyQuote]études,", "dont", "deux", "examens.,", "un",
"examen";
Now I want to delete all stopwords. I tried:
DeleteCases[text,#]&/@frenchStopWords
...but this is wrong. I know that I need to use an "OR" operator, but I don't know how to implment it without running a for loop. Thanks.
string-manipulation natural-language
1
Complement[text, frenchStopWords]
â Carl Lange
4 hours ago
1
@james: As well as the nice Comment, see "New Proposal" at mathematica.stackexchange.com/questions/18100/â¦. Too bad reference.wolfram.com/language/ref/DeleteStopwords.html does not have language Options.
â Moo
3 hours ago
add a comment |Â
up vote
2
down vote
favorite
up vote
2
down vote
favorite
I have a list of french stopwords:
frenchStopWords = "alors", "au", "aucuns", "aussi", "autre", "avant", "avec", "avoir",
"bon", "car", "ce", "cela", "ces", "ceux", "chaque", "ci", "comme",
"comment", "dans", "des", "du", "dedans", "dehors", "depuis",
"devrait", "doit", "donc", "dos", "début", "elle", "elles", "en",
"encore", "essai", "est", "et", "eu", "fait", "faites", "fois",
"font", "hors", "ici", "il", "ils", "je", "juste", "la", "le", "les",
"leur", "lÃÂ ", "ma", "maintenant", "mais", "mes", "mine", "moins",
"mon", "mot", "même", "ni", "nommés", "notre", "nous", "ou", "où",
"par", "parce", "pas", "peut", "peu", "plupart", "pour", "pourquoi",
"quand", "que", "quel", "quelle", "quelles", "quels", "qui", "sa",
"sans", "ses", "seulement", "si", "sien", "son", "sont", "sous",
"soyez", "sujet", "sur", "ta", "tandis", "tellement", "tels", "tes",
"ton", "tous", "tout", "trop", "très", "tu", "voient", "vont",
"votre", "vous", "vu", "ça", "étaient", "état", "étions", "été",
"être";
And some french text here:
text = "et", "bien", "bonjour,", "vous", "avez", "déjà", "suivi,",
"peut-être", "le", "cours", "electronique", "et,", "voilà,", "ça", "c
[CloseCurlyQuote]est", "le", "cours", "electronique", "ii,", "c
[CloseCurlyQuote]est", "une", "suite", "logique", "du", "premier",
"cours,", "dans", "le", "premier", "cours", "dans", "l
[CloseCurlyQuote]électronique", ",", "étudié,", "tout", "ce", "qui",
"était", "lié", "aux", "fonctions,", "électroniques", "basées",
"sur", "les", "amplificateurs,", "opérationnels.", "dans", "ce",
"cours", "lÃÂ ,,", "va", "aborder", "le", "transistor", "bipolaire,",
"et", "les", "fonctions", "analogiques", "de", "base.,", "donc",
"va", "partir", "avec", "toutes,", "les", "fonctions", "de", "base",
"depuis", "l[CloseCurlyQuote]analyse,", "du", "transistor", "jusqu
[CloseCurlyQuote]ÃÂ ", "ce", "qu[CloseCurlyQuote],", "arrive",
"avec", "des", "fonctions", "un", "peu,", "plus", "complexes,", "du",
"style", "analyser", "un,", "régulateur", "série", "et", "terminer",
"avec,", "l[CloseCurlyQuote]analyse", "des", "circuits", "tels",
"que,", "les", "amplificateurs", "de", "puissance,", "et", "les",
"amplificateurs", "audio.", "donc", "si", "regarde", "ce", "cours,,",
"va", "se", "rendre", "compte", "que", "la", "suite,", "d
[CloseCurlyQuote]electronique", "c[CloseCurlyQuote]est",
"electronique", "ii", ".,", "je", "vais", "aller", "dans", "ce",
"cours,", "et", "voir", "ce", "qui", "va", "se", "passer.,", "je",
"vais", "aller", "lÃÂ -dedans,", "et", "voir", "comment", "c
[CloseCurlyQuote]est", "structuré", "ce", "cours.,", "donc,",
"exactement", "de", "la", "même", "manière", "que", "electronique",
",", "va", "trouver,", "les", "semaines.,", "il", "y", "l
[CloseCurlyQuote]équivalent", "de", "8", "semaines,", "d
[CloseCurlyQuote]études,", "dont", "deux", "examens.,", "un",
"examen";
Now I want to delete all stopwords. I tried:
DeleteCases[text,#]&/@frenchStopWords
...but this is wrong. I know that I need to use an "OR" operator, but I don't know how to implment it without running a for loop. Thanks.
string-manipulation natural-language
I have a list of french stopwords:
frenchStopWords = "alors", "au", "aucuns", "aussi", "autre", "avant", "avec", "avoir",
"bon", "car", "ce", "cela", "ces", "ceux", "chaque", "ci", "comme",
"comment", "dans", "des", "du", "dedans", "dehors", "depuis",
"devrait", "doit", "donc", "dos", "début", "elle", "elles", "en",
"encore", "essai", "est", "et", "eu", "fait", "faites", "fois",
"font", "hors", "ici", "il", "ils", "je", "juste", "la", "le", "les",
"leur", "lÃÂ ", "ma", "maintenant", "mais", "mes", "mine", "moins",
"mon", "mot", "même", "ni", "nommés", "notre", "nous", "ou", "où",
"par", "parce", "pas", "peut", "peu", "plupart", "pour", "pourquoi",
"quand", "que", "quel", "quelle", "quelles", "quels", "qui", "sa",
"sans", "ses", "seulement", "si", "sien", "son", "sont", "sous",
"soyez", "sujet", "sur", "ta", "tandis", "tellement", "tels", "tes",
"ton", "tous", "tout", "trop", "très", "tu", "voient", "vont",
"votre", "vous", "vu", "ça", "étaient", "état", "étions", "été",
"être";
And some french text here:
text = "et", "bien", "bonjour,", "vous", "avez", "déjà", "suivi,",
"peut-être", "le", "cours", "electronique", "et,", "voilà,", "ça", "c
[CloseCurlyQuote]est", "le", "cours", "electronique", "ii,", "c
[CloseCurlyQuote]est", "une", "suite", "logique", "du", "premier",
"cours,", "dans", "le", "premier", "cours", "dans", "l
[CloseCurlyQuote]électronique", ",", "étudié,", "tout", "ce", "qui",
"était", "lié", "aux", "fonctions,", "électroniques", "basées",
"sur", "les", "amplificateurs,", "opérationnels.", "dans", "ce",
"cours", "lÃÂ ,,", "va", "aborder", "le", "transistor", "bipolaire,",
"et", "les", "fonctions", "analogiques", "de", "base.,", "donc",
"va", "partir", "avec", "toutes,", "les", "fonctions", "de", "base",
"depuis", "l[CloseCurlyQuote]analyse,", "du", "transistor", "jusqu
[CloseCurlyQuote]ÃÂ ", "ce", "qu[CloseCurlyQuote],", "arrive",
"avec", "des", "fonctions", "un", "peu,", "plus", "complexes,", "du",
"style", "analyser", "un,", "régulateur", "série", "et", "terminer",
"avec,", "l[CloseCurlyQuote]analyse", "des", "circuits", "tels",
"que,", "les", "amplificateurs", "de", "puissance,", "et", "les",
"amplificateurs", "audio.", "donc", "si", "regarde", "ce", "cours,,",
"va", "se", "rendre", "compte", "que", "la", "suite,", "d
[CloseCurlyQuote]electronique", "c[CloseCurlyQuote]est",
"electronique", "ii", ".,", "je", "vais", "aller", "dans", "ce",
"cours,", "et", "voir", "ce", "qui", "va", "se", "passer.,", "je",
"vais", "aller", "lÃÂ -dedans,", "et", "voir", "comment", "c
[CloseCurlyQuote]est", "structuré", "ce", "cours.,", "donc,",
"exactement", "de", "la", "même", "manière", "que", "electronique",
",", "va", "trouver,", "les", "semaines.,", "il", "y", "l
[CloseCurlyQuote]équivalent", "de", "8", "semaines,", "d
[CloseCurlyQuote]études,", "dont", "deux", "examens.,", "un",
"examen";
Now I want to delete all stopwords. I tried:
DeleteCases[text,#]&/@frenchStopWords
...but this is wrong. I know that I need to use an "OR" operator, but I don't know how to implment it without running a for loop. Thanks.
string-manipulation natural-language
string-manipulation natural-language
edited 3 hours ago
gwr
7,01622457
7,01622457
asked 4 hours ago
james
664418
664418
1
Complement[text, frenchStopWords]
â Carl Lange
4 hours ago
1
@james: As well as the nice Comment, see "New Proposal" at mathematica.stackexchange.com/questions/18100/â¦. Too bad reference.wolfram.com/language/ref/DeleteStopwords.html does not have language Options.
â Moo
3 hours ago
add a comment |Â
1
Complement[text, frenchStopWords]
â Carl Lange
4 hours ago
1
@james: As well as the nice Comment, see "New Proposal" at mathematica.stackexchange.com/questions/18100/â¦. Too bad reference.wolfram.com/language/ref/DeleteStopwords.html does not have language Options.
â Moo
3 hours ago
1
1
Complement[text, frenchStopWords]
â Carl Lange
4 hours ago
Complement[text, frenchStopWords]
â Carl Lange
4 hours ago
1
1
@james: As well as the nice Comment, see "New Proposal" at mathematica.stackexchange.com/questions/18100/â¦. Too bad reference.wolfram.com/language/ref/DeleteStopwords.html does not have language Options.
â Moo
3 hours ago
@james: As well as the nice Comment, see "New Proposal" at mathematica.stackexchange.com/questions/18100/â¦. Too bad reference.wolfram.com/language/ref/DeleteStopwords.html does not have language Options.
â Moo
3 hours ago
add a comment |Â
1 Answer
1
active
oldest
votes
up vote
2
down vote
I think there is an important lesson to learn here, many people overlook:
DeleteCases[text, Alternatives@@frenchStopWords]
While Complement
in the comments seems equivalent it does not accept patterns and DeleteCases
does, which would account for more general cases. This is why it i useful to remember Alternatives
usage in DeleteCases
.
add a comment |Â
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
2
down vote
I think there is an important lesson to learn here, many people overlook:
DeleteCases[text, Alternatives@@frenchStopWords]
While Complement
in the comments seems equivalent it does not accept patterns and DeleteCases
does, which would account for more general cases. This is why it i useful to remember Alternatives
usage in DeleteCases
.
add a comment |Â
up vote
2
down vote
I think there is an important lesson to learn here, many people overlook:
DeleteCases[text, Alternatives@@frenchStopWords]
While Complement
in the comments seems equivalent it does not accept patterns and DeleteCases
does, which would account for more general cases. This is why it i useful to remember Alternatives
usage in DeleteCases
.
add a comment |Â
up vote
2
down vote
up vote
2
down vote
I think there is an important lesson to learn here, many people overlook:
DeleteCases[text, Alternatives@@frenchStopWords]
While Complement
in the comments seems equivalent it does not accept patterns and DeleteCases
does, which would account for more general cases. This is why it i useful to remember Alternatives
usage in DeleteCases
.
I think there is an important lesson to learn here, many people overlook:
DeleteCases[text, Alternatives@@frenchStopWords]
While Complement
in the comments seems equivalent it does not accept patterns and DeleteCases
does, which would account for more general cases. This is why it i useful to remember Alternatives
usage in DeleteCases
.
edited 23 mins ago
answered 31 mins ago
Vitaliy Kaurov
56.4k6158275
56.4k6158275
add a comment |Â
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fmathematica.stackexchange.com%2fquestions%2f184750%2fdelete-all-french-stopwords%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
1
Complement[text, frenchStopWords]
â Carl Lange
4 hours ago
1
@james: As well as the nice Comment, see "New Proposal" at mathematica.stackexchange.com/questions/18100/â¦. Too bad reference.wolfram.com/language/ref/DeleteStopwords.html does not have language Options.
â Moo
3 hours ago