/****************************************************************************** * Programme : Rule Extraction for Binary Targets with BOOLRULE * Reference : RULEEXB1B0 * Source : https://www.wearecas.eu/en/sampleCode/RULEEXB1B0 ******************************************************************************/ /* --- BLOC 1 --- */ /* Établir une session CAS et créer la bibliothèque 'mycas' */ cas; libname mycas cas; /* Création de la table de données d'avis */ data mycas.reviews; infile datalines delimiter='|' missover; length text $300 category $20; input text$ positive category$ did; datalines; This is the greatest phone ever! love it!|1|electronics|1 The phone's battery life is too short and screen resolution is low.|0|electronics|2 The screen resolution is low, but I love this tv.|1|electronics|3 The movie itself is great and I like it, although the resolution is low.|1|movies|4 The movie's story is boring and the acting is poor.|0|movies|5 I watched this movie on tv, it's not good on a small screen. |0|movies|6 watched the movie first and loved it, the book is even better!|1|books |7 I like the story in this book, they should put it on screen.|1|books|8 I love the author, but this book is a waste of time, don't buy it.|0|books|9 ; run; /* Prétraitement du texte avec PROC TEXTMINE */ proc textmine data=mycas.reviews; doc_id did; var text; parse nonoungroups notagging entities = none outparent = mycas.reviews_bow outterms = mycas.reviews_terms reducef = 1; run; /* Extraction des règles avec PROC BOOLRULE pour cible binaire 'positive' */ proc boolrule data = mycas.reviews_bow docid = _document_ termid = _termnum_ docinfo = mycas.reviews terminfo = mycas.reviews_terms minsupports = 1 mpos = 1 gpos = 1; docinfo id = did targets = (positive); terminfo id = key label = term; output ruleterms = mycas.ruleterms_basic rules = mycas.rules_basic; run; /* Affichage des règles générées */ data rules_basic; set mycas.rules_basic; run; proc print data=rules_basic; title 'Exemple Basique: Règles pour sentiment positif'; var target ruleid rule F1 precision recall; run; /* Nettoyage */ proc casutil; droptable casdata='reviews' incaslib='mycas'; droptable casdata='reviews_bow' incaslib='mycas'; droptable casdata='reviews_terms' incaslib='mycas'; droptable casdata='ruleterms_basic' incaslib='mycas'; droptable casdata='rules_basic' incaslib='mycas'; quit; /* --- BLOC 2 --- */ /* Établir une session CAS et créer la bibliothèque 'mycas' */ cas; libname mycas cas; /* Création de la table de données d'avis */ data mycas.reviews; infile datalines delimiter='|' missover; length text $300 category $20; input text$ positive category$ did; datalines; This is the greatest phone ever! love it!|1|electronics|1 The phone's battery life is too short and screen resolution is low.|0|electronics|2 The screen resolution is low, but I love this tv.|1|electronics|3 The movie itself is great and I like it, although the resolution is low.|1|movies|4 The movie's story is boring and the acting is poor.|0|movies|5 I watched this movie on tv, it's not good on a small screen. |0|movies|6 watched the movie first and loved it, the book is even better!|1|books |7 I like the story in this book, they should put it on screen.|1|books|8 I love the author, but this book is a waste of time, don't buy it.|0|books|9 ; run; /* Prétraitement du texte avec PROC TEXTMINE */ proc textmine data=mycas.reviews; doc_id did; var text; parse nonoungroups notagging entities = none outparent = mycas.reviews_bow outterms = mycas.reviews_terms reducef = 1; run; /* Extraction des règles avec des options supplémentaires */ proc boolrule data = mycas.reviews_bow docid = _document_ termid = _termnum_ docinfo = mycas.reviews terminfo = mycas.reviews_terms minsupports = 1 mpos = 1 gpos = 1 nocutoff /* Ne pas couper les règles candidates */ minrulelen = 1 /* Longueur minimale de la règle */ maxrulelen = 3 /* Longueur maximale de la règle */ nthreads = 4; /* Utiliser 4 threads pour le traitement */ docinfo id = did targets = (positive); terminfo id = key label = term; output ruleterms = mycas.ruleterms_intermediaire rules = mycas.rules_intermediaire; run; /* Affichage des règles générées */ data rules_intermediaire; set mycas.rules_intermediaire; run; proc print data=rules_intermediaire; title 'Exemple Intermédiaire: Règles avec options courantes'; var target ruleid rule F1 precision recall; run; /* Nettoyage */ proc casutil; droptable casdata='reviews' incaslib='mycas'; droptable casdata='reviews_bow' incaslib='mycas'; droptable casdata='reviews_terms' incaslib='mycas'; droptable casdata='ruleterms_intermediaire' incaslib='mycas'; droptable casdata='rules_intermediaire' incaslib='mycas'; quit; /* --- BLOC 3 --- */ /* Établir une session CAS et créer la bibliothèque 'mycas' */ cas; libname mycas cas; /* Création de la table de données d'avis */ data mycas.reviews; infile datalines delimiter='|' missover; length text $300 category $20; input text$ positive category$ did; datalines; This is the greatest phone ever! love it!|1|electronics|1 The phone's battery life is too short and screen resolution is low.|0|electronics|2 The screen resolution is low, but I love this tv.|1|electronics|3 The movie itself is great and I like it, although the resolution is low.|1|movies|4 The movie's story is boring and the acting is poor.|0|movies|5 I watched this movie on tv, it's not good on a small screen. |0|movies|6 watched the movie first and loved it, the book is even better!|1|books |7 I like the story in this book, they should put it on screen.|1|books|8 I love the author, but this book is a waste of time, don't buy it.|0|books|9 ; run; /* Filtrage des avis pour une catégorie spécifique (par exemple, 'electronics') */ data mycas.reviews_filtered; set mycas.reviews; where category = 'electronics'; run; /* Prétraitement du texte avec PROC TEXTMINE sur les données filtrées */ proc textmine data=mycas.reviews_filtered; doc_id did; var text; parse nonoungroups notagging entities = none outparent = mycas.reviews_bow_filtered outterms = mycas.reviews_terms_filtered reducef = 1; run; /* Extraction des règles avec PROC BOOLRULE sur les données filtrées */ proc boolrule data = mycas.reviews_bow_filtered docid = _document_ termid = _termnum_ docinfo = mycas.reviews_filtered terminfo = mycas.reviews_terms_filtered minsupports = 1 mpos = 1 gpos = 1; docinfo id = did targets = (positive); terminfo id = key label = term; output ruleterms = mycas.ruleterms_avance rules = mycas.rules_avance; run; /* Affichage des règles générées */ data rules_avance; set mycas.rules_avance; run; proc print data=rules_avance; title 'Exemple Avancé: Règles pour les avis électroniques'; var target ruleid rule F1 precision recall; run; /* Nettoyage */ proc casutil; droptable casdata='reviews' incaslib='mycas'; droptable casdata='reviews_filtered' incaslib='mycas'; droptable casdata='reviews_bow_filtered' incaslib='mycas'; droptable casdata='reviews_terms_filtered' incaslib='mycas'; droptable casdata='ruleterms_avance' incaslib='mycas'; droptable casdata='rules_avance' incaslib='mycas'; quit; /* --- BLOC 4 --- */ /* Démarrer une nouvelle session CAS pour un contrôle explicite */ options casport=5570 cashost='localhost'; cas my_new_session sessopts=(caslib=casuser timeout=1800 locale="en_US"); libname mycas cas; /* Création de la table de données d'avis (avec plus de données pour simuler une grande table) */ data mycas.reviews_large; infile datalines delimiter='|' missover; length text $300 category $20; input text$ positive category$ did; datalines; This is the greatest phone ever! love it!|1|electronics|1 The phone's battery life is too short and screen resolution is low.|0|electronics|2 The screen resolution is low, but I love this tv.|1|electronics|3 The movie itself is great and I like it, although the resolution is low.|1|movies|4 The movie's story is boring and the acting is poor.|0|movies|5 I watched this movie on tv, it's not good on a small screen. |0|movies|6 watched the movie first and loved it, the book is even better!|1|books |7 I like the story in this book, they should put it on screen.|1|books|8 I love the author, but this book is a waste of time, don't buy it.|0|books|9 Fantastic product, highly recommend!|1|electronics|10 Battery drains too fast, very disappointed.|0|electronics|11 Excellent display and sound quality.|1|electronics|12 A captivating story, a must-watch!|1|movies|13 Terrible plot and acting, waste of time.|0|movies|14 Enjoyed reading every single page.|1|books|15 Not worth the hype, prefer other titles.|0|books|16 Best purchase this year!|1|electronics|17 Worst experience ever, totally unreliable.|0|electronics|18 Absolutely brilliant, a masterpiece.|1|movies|19 Couldn't put it down, truly amazing.|1|books|20 ; run; /* Prétraitement du texte avec PROC TEXTMINE */ proc textmine data=mycas.reviews_large; doc_id did; var text; parse nonoungroups notagging entities = none outparent = mycas.reviews_bow_large outterms = mycas.reviews_terms_large reducef = 1; run; /* Extraction des règles avec PROC BOOLRULE */ proc boolrule data = mycas.reviews_bow_large docid = _document_ termid = _termnum_ docinfo = mycas.reviews_large terminfo = mycas.reviews_terms_large minsupports = 0.1 /* Support minimum de 10% */ mpos = 0.5 /* Minimum de positivité de 50% */ gpos = 0.5 /* Global positivité de 50% */ maxrulelen = 2 /* Longueur maximale de la règle */ nthreads = 8; /* Utiliser 8 threads pour le traitement */ docinfo id = did targets = (positive); terminfo id = key label = term; output ruleterms = mycas.ruleterms_cas rules = mycas.rules_cas; run; /* Affichage des règles générées */ data rules_cas; set mycas.rules_cas; run; proc print data=rules_cas; title 'Exemple Viya/CAS: Règles avec gestion de session et performances'; var target ruleid rule F1 precision recall; run; /* Nettoyage et fin de session CAS */ proc casutil; droptable casdata='reviews_large' incaslib='mycas'; droptable casdata='reviews_bow_large' incaslib='mycas'; droptable casdata='reviews_terms_large' incaslib='mycas'; droptable casdata='ruleterms_cas' incaslib='mycas'; droptable casdata='rules_cas' incaslib='mycas'; quit; cas my_new_session terminate;