diff --git a/website/package-lock.json b/website/package-lock.json index f6d95f21..c1aca5cc 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -141,6 +141,16 @@ "markdown-it": "bin/markdown-it.js" } }, + "node_modules/@babel/helper-string-parser": { + "version": "7.18.10", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.18.10.tgz", + "integrity": "sha512-XtIfWmeNY3i4t7t4D2t02q50HvqHybPqW2ki1kosnvWCwuCMeo81Jf0gwr85jy/neUdg5XDdeFE/80DXiO+njw==", + "dev": true, + "peer": true, + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/@babel/helper-validator-identifier": { "version": "7.18.6", "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.18.6.tgz", @@ -152,9 +162,9 @@ } }, "node_modules/@babel/parser": { - "version": "7.18.9", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.18.9.tgz", - "integrity": "sha512-9uJveS9eY9DJ0t64YbIBZICtJy8a5QrDEVdiLCG97fVLpDTpGX7t8mMSb6OWw6Lrnjqj4O8zwjELX3dhoMgiBg==", + "version": "7.18.10", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.18.10.tgz", + "integrity": "sha512-TYk3OA0HKL6qNryUayb5UUEhM/rkOQozIBEA5ITXh5DWrSp0TlUQXMyZmnWxG/DizSWBeeQ0Zbc5z8UGaaqoeg==", "dev": true, "peer": true, "bin": { @@ -165,12 +175,13 @@ } }, "node_modules/@babel/types": { - "version": "7.18.9", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.18.9.tgz", - "integrity": "sha512-WwMLAg2MvJmt/rKEVQBBhIVffMmnilX4oe0sRe7iPOHIGsqpruFHHdrfj4O1CMMtgMtCU4oPafZjDPCRgO57Wg==", + "version": "7.18.10", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.18.10.tgz", + "integrity": "sha512-MJvnbEiiNkpjo+LknnmRrqbY1GPUUggjv+wQVjetM/AONoupqRALB7I6jGqNUAZsKcRIEu2J6FRFvsczljjsaQ==", "dev": true, "peer": true, "dependencies": { + "@babel/helper-string-parser": "^7.18.10", "@babel/helper-validator-identifier": "^7.18.6", "to-fast-properties": "^2.0.0" }, @@ -303,9 +314,9 @@ "peer": true }, "node_modules/@types/node": { - "version": "18.6.2", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.6.2.tgz", - "integrity": "sha512-KcfkBq9H4PI6Vpu5B/KoPeuVDAbmi+2mDBqGPGUgoL7yXQtcWGu2vJWmmRkneWK3Rh0nIAX192Aa87AqKHYChQ==", + "version": "18.6.3", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.6.3.tgz", + "integrity": "sha512-6qKpDtoaYLM+5+AFChLhHermMQxc3TOEFIDzrZLPRGHPrLEwqFkkT5Kx3ju05g6X7uDPazz3jHbKPX0KzCjntg==", "dev": true, "peer": true }, @@ -2162,9 +2173,9 @@ "peer": true }, "node_modules/is-core-module": { - "version": "2.9.0", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.9.0.tgz", - "integrity": "sha512-+5FPy5PnwmO3lvfMb0AsoPaBG+5KHUI0wYFXOtYPnVVVspTFUuMZNfNaNVRt3FZadstu2c8x23vykRW/NBoU6A==", + "version": "2.10.0", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.10.0.tgz", + "integrity": "sha512-Erxj2n/LDAZ7H8WNJXd9tw38GYM3dv8rk8Zcs+jJuxYTW7sozH+SS8NtrSjVL1/vpLvWi1hxy96IzjJ3EHTJJg==", "dev": true, "peer": true, "dependencies": { @@ -4804,9 +4815,9 @@ } }, "node_modules/yargs-parser": { - "version": "21.0.1", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.0.1.tgz", - "integrity": "sha512-9BK1jFpLzJROCI5TzwZL/TU4gqjK5xiHV/RfWLOahrjAko/e4DJkRDZQXfvqAsiZzzYhgAzbgz6lg48jcm4GLg==", + "version": "21.1.0", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.0.tgz", + "integrity": "sha512-xzm2t63xTV/f7+bGMSRzLhUNk1ajv/tDoaD5OeGyC3cFo2fl7My9Z4hS3q2VdQ7JaLvTxErO8Jp5pRIFGMD/zg==", "dev": true, "peer": true, "engines": { @@ -4915,6 +4926,13 @@ "normalize-path": "^3.0.0" } }, + "@babel/helper-string-parser": { + "version": "7.18.10", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.18.10.tgz", + "integrity": "sha512-XtIfWmeNY3i4t7t4D2t02q50HvqHybPqW2ki1kosnvWCwuCMeo81Jf0gwr85jy/neUdg5XDdeFE/80DXiO+njw==", + "dev": true, + "peer": true + }, "@babel/helper-validator-identifier": { "version": "7.18.6", "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.18.6.tgz", @@ -4923,19 +4941,20 @@ "peer": true }, "@babel/parser": { - "version": "7.18.9", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.18.9.tgz", - "integrity": "sha512-9uJveS9eY9DJ0t64YbIBZICtJy8a5QrDEVdiLCG97fVLpDTpGX7t8mMSb6OWw6Lrnjqj4O8zwjELX3dhoMgiBg==", + "version": "7.18.10", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.18.10.tgz", + "integrity": "sha512-TYk3OA0HKL6qNryUayb5UUEhM/rkOQozIBEA5ITXh5DWrSp0TlUQXMyZmnWxG/DizSWBeeQ0Zbc5z8UGaaqoeg==", "dev": true, "peer": true }, "@babel/types": { - "version": "7.18.9", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.18.9.tgz", - "integrity": "sha512-WwMLAg2MvJmt/rKEVQBBhIVffMmnilX4oe0sRe7iPOHIGsqpruFHHdrfj4O1CMMtgMtCU4oPafZjDPCRgO57Wg==", + "version": "7.18.10", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.18.10.tgz", + "integrity": "sha512-MJvnbEiiNkpjo+LknnmRrqbY1GPUUggjv+wQVjetM/AONoupqRALB7I6jGqNUAZsKcRIEu2J6FRFvsczljjsaQ==", "dev": true, "peer": true, "requires": { + "@babel/helper-string-parser": "^7.18.10", "@babel/helper-validator-identifier": "^7.18.6", "to-fast-properties": "^2.0.0" } @@ -5043,9 +5062,9 @@ "peer": true }, "@types/node": { - "version": "18.6.2", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.6.2.tgz", - "integrity": "sha512-KcfkBq9H4PI6Vpu5B/KoPeuVDAbmi+2mDBqGPGUgoL7yXQtcWGu2vJWmmRkneWK3Rh0nIAX192Aa87AqKHYChQ==", + "version": "18.6.3", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.6.3.tgz", + "integrity": "sha512-6qKpDtoaYLM+5+AFChLhHermMQxc3TOEFIDzrZLPRGHPrLEwqFkkT5Kx3ju05g6X7uDPazz3jHbKPX0KzCjntg==", "dev": true, "peer": true }, @@ -6537,9 +6556,9 @@ "peer": true }, "is-core-module": { - "version": "2.9.0", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.9.0.tgz", - "integrity": "sha512-+5FPy5PnwmO3lvfMb0AsoPaBG+5KHUI0wYFXOtYPnVVVspTFUuMZNfNaNVRt3FZadstu2c8x23vykRW/NBoU6A==", + "version": "2.10.0", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.10.0.tgz", + "integrity": "sha512-Erxj2n/LDAZ7H8WNJXd9tw38GYM3dv8rk8Zcs+jJuxYTW7sozH+SS8NtrSjVL1/vpLvWi1hxy96IzjJ3EHTJJg==", "dev": true, "peer": true, "requires": { @@ -8633,9 +8652,9 @@ } }, "yargs-parser": { - "version": "21.0.1", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.0.1.tgz", - "integrity": "sha512-9BK1jFpLzJROCI5TzwZL/TU4gqjK5xiHV/RfWLOahrjAko/e4DJkRDZQXfvqAsiZzzYhgAzbgz6lg48jcm4GLg==", + "version": "21.1.0", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.0.tgz", + "integrity": "sha512-xzm2t63xTV/f7+bGMSRzLhUNk1ajv/tDoaD5OeGyC3cFo2fl7My9Z4hS3q2VdQ7JaLvTxErO8Jp5pRIFGMD/zg==", "dev": true, "peer": true } diff --git a/website/package.json b/website/package.json index c381b055..d735a438 100644 --- a/website/package.json +++ b/website/package.json @@ -1,7 +1,7 @@ { "name": "lit-website", "version": "1.0.0", - "description": "Code for the Languange Interpretability Tool website, which is hosted as a github page.", + "description": "Code for the Language Interpretability Tool website, which is hosted as a github page.", "main": "", "scripts": {}, "author": "People + AI Research", diff --git a/website/src/assets/images/text-salience-image-1.png b/website/src/assets/images/text-salience-image-1.png new file mode 100644 index 00000000..30f695a0 Binary files /dev/null and b/website/src/assets/images/text-salience-image-1.png differ diff --git a/website/src/assets/images/text-salience-image-10.png b/website/src/assets/images/text-salience-image-10.png new file mode 100644 index 00000000..041aa5a6 Binary files /dev/null and b/website/src/assets/images/text-salience-image-10.png differ diff --git a/website/src/assets/images/text-salience-image-11.png b/website/src/assets/images/text-salience-image-11.png new file mode 100644 index 00000000..5a112a09 Binary files /dev/null and b/website/src/assets/images/text-salience-image-11.png differ diff --git a/website/src/assets/images/text-salience-image-12.png b/website/src/assets/images/text-salience-image-12.png new file mode 100644 index 00000000..3c1471a0 Binary files /dev/null and b/website/src/assets/images/text-salience-image-12.png differ diff --git a/website/src/assets/images/text-salience-image-13.png b/website/src/assets/images/text-salience-image-13.png new file mode 100644 index 00000000..c703eda0 Binary files /dev/null and b/website/src/assets/images/text-salience-image-13.png differ diff --git a/website/src/assets/images/text-salience-image-14.png b/website/src/assets/images/text-salience-image-14.png new file mode 100644 index 00000000..0298bbfd Binary files /dev/null and b/website/src/assets/images/text-salience-image-14.png differ diff --git a/website/src/assets/images/text-salience-image-15.png b/website/src/assets/images/text-salience-image-15.png new file mode 100644 index 00000000..8157c9fc Binary files /dev/null and b/website/src/assets/images/text-salience-image-15.png differ diff --git a/website/src/assets/images/text-salience-image-16.png b/website/src/assets/images/text-salience-image-16.png new file mode 100644 index 00000000..f064bd70 Binary files /dev/null and b/website/src/assets/images/text-salience-image-16.png differ diff --git a/website/src/assets/images/text-salience-image-2.png b/website/src/assets/images/text-salience-image-2.png new file mode 100644 index 00000000..6e0bd216 Binary files /dev/null and b/website/src/assets/images/text-salience-image-2.png differ diff --git a/website/src/assets/images/text-salience-image-3.png b/website/src/assets/images/text-salience-image-3.png new file mode 100644 index 00000000..2fcb5edb Binary files /dev/null and b/website/src/assets/images/text-salience-image-3.png differ diff --git a/website/src/assets/images/text-salience-image-4.png b/website/src/assets/images/text-salience-image-4.png new file mode 100644 index 00000000..e8d93931 Binary files /dev/null and b/website/src/assets/images/text-salience-image-4.png differ diff --git a/website/src/assets/images/text-salience-image-5.png b/website/src/assets/images/text-salience-image-5.png new file mode 100644 index 00000000..e286d4b3 Binary files /dev/null and b/website/src/assets/images/text-salience-image-5.png differ diff --git a/website/src/assets/images/text-salience-image-6.png b/website/src/assets/images/text-salience-image-6.png new file mode 100644 index 00000000..2b24d681 Binary files /dev/null and b/website/src/assets/images/text-salience-image-6.png differ diff --git a/website/src/assets/images/text-salience-image-7.png b/website/src/assets/images/text-salience-image-7.png new file mode 100644 index 00000000..7a1d4c21 Binary files /dev/null and b/website/src/assets/images/text-salience-image-7.png differ diff --git a/website/src/assets/images/text-salience-image-8.png b/website/src/assets/images/text-salience-image-8.png new file mode 100644 index 00000000..3143e4ed Binary files /dev/null and b/website/src/assets/images/text-salience-image-8.png differ diff --git a/website/src/assets/images/text-salience-image-9.png b/website/src/assets/images/text-salience-image-9.png new file mode 100644 index 00000000..54a5e3f6 Binary files /dev/null and b/website/src/assets/images/text-salience-image-9.png differ diff --git a/website/src/tutorials.md b/website/src/tutorials.md index a1c9b713..c6bd8063 100644 --- a/website/src/tutorials.md +++ b/website/src/tutorials.md @@ -25,6 +25,9 @@ c-copy: "Get familiar with the interface of the Language Interpretability Tool." ## Conducting analysis in LIT +{% include partials/tutorial-link-element c-title: "Salience Maps for Text", link: "/tutorials/text-salience", +c-copy: "Learn how to use salience maps for text data in LIT." %} + {% include partials/tutorial-link-element c-title: "Tabular Feature Attribution", link: "/tutorials/tab-feat-attr", c-copy: "Learn how to use the Kernel SHAP based Tabular Feature Attribution module in LIT." %} diff --git a/website/src/tutorials/text-salience.md b/website/src/tutorials/text-salience.md new file mode 100644 index 00000000..02343734 --- /dev/null +++ b/website/src/tutorials/text-salience.md @@ -0,0 +1,337 @@ +--- +title: Salience Maps for Text +layout: layouts/tutorial.liquid + +hero-image: /assets/images/sample-banner.png +hero-title: "Salience Maps for Text" +hero-copy: "Learn how to use salience maps for text data in LIT." + +bc-anchor-category: "analysis" +bc-category-title: "Analysis" +bc-title: "Salience Maps for Text" + +time: "15 minutes" +takeaways: "Learn how to use salience maps for text data in LIT." +--- + +## Tutorial : Salience Maps for Text + +{% include partials/link-out link: "../../demos/glue.html", text: "Explore this demo yourself." %} + +Or, run your own with [`examples/glue_demo.py`](https://github.com/PAIR-code/lit/blob/main/lit_nlp/examples/glue_demo.py) + +LIT enables users to analyze individual predictions for text input using +salience maps, for which gradient-based and/or blackbox methods are available. +In this tutorial, we will explore how to use salience maps to analyze a text +classifier in the [Classification and Regression models demo](https://pair-code.github.io/lit/demos/glue.html) +from the LIT website, and +how these findings can support counterfactual analysis using LIT’s generators, +such as Hotflip, to test hypotheses. The Salience Maps module can be found under +the Explanations tab in the bottom half of this demo and it supports four +different methods for the GLUE model under test (with other models it might +support a different number of these methods) - +[Grad L2 Norm](https://aclanthology.org/P18-1032/), +[Grad · Input](https://arxiv.org/abs/1412.6815), +[Integrated Gradients](https://arxiv.org/pdf/1703.01365.pdf) (IG) +and [LIME](https://arxiv.org/pdf/1602.04938v3.pdf). + +### Heuristics : Which salience method for which task? + +Salience methods are imperfect. Research has shown that salience methods are +often “[sensitive to factors that do not contribute to a model’s prediction](https://arxiv.org/abs/1711.00867)”; +that people tend to [overly trust salience values or use methods they believe they know incorrectly](https://dl.acm.org/doi/10.1145/3313831.3376219); +and that [model architecture may directly impact the utility](https://arxiv.org/pdf/2111.07367.pdf) +of different salience methods. + +With those limitations in mind, the question remains as to which methods should +be used and when. To offer some guidance, we have come up with the following +decision aid that provides some ideas about which salience method(s) might be +appropriate. + +{% include partials/inset-image image: '/assets/images/text-salience-image-1.png', + caption: 'TODO'%} + +If your model does not output gradients with its predictions (i.e., is a +blackbox), [LIME](https://arxiv.org/pdf/1602.04938v3.pdf) is your only choice as +it is currently the only black-box method LIT supports for text data. + +If your model does output gradients, then you can choose among three methods: +[Grad L2 Norm](https://aclanthology.org/P18-1032/), +[Grad · Input](https://arxiv.org/abs/1412.6815), and +[Integrated Gradients](https://arxiv.org/pdf/1703.01365.pdf) (IG). +Grad L2 Norm and Grad · Input are easy to use and fast to compute, but can suffer from gradient +saturation. IG addresses the gradient saturation issue in the Grad methods +(described in detail below), but requires that the model output both gradients +and embeddings, is much more expensive to compute, and requires parameterization +to optimize results. + +Remember that a good investigative process will check for commonalities and +patterns across salience values from multiple salience methods. Further, +salience methods should be an entry point for developing hypotheses about your +model’s behavior, and for identifying subsets of examples and/or creating +counterfactual examples that test those hypotheses. + +### Salience Maps for Text : Theoretical background and LIT overview + +All methods calculate salience, but there are subtle differences in their +approaches towards calculating a salience score for each token. Grad L2 Norm +only produces absolute salience scores while other methods like Grad · Input +(and also Integrated Gradients and LIME) produce signed values, leading to an +improved interpretation of whether a token has positive or negative influence on +the prediction. + +**_LIT uses different color scales to represent signed and unsigned salience scores_**. +Methods that produce unsigned salience values, such as Grad L2 Norm, +use a purple scale where darker colors indicate greater salience, whereas the other methods +use a red-to-green scale, with red denoting negative scores and green denoting +positive. + +{% include partials/info-box title: 'Interpreting salience polarity', + text: "Salience is always relative to the model’s prediction of one class. + Intuitively, a positive influence score (attribution) for a token (or word, + depending on your method) in an example means that if this token was removed + we expect a drop in model confidence in the prediction of the class. + Similarly, removing a negative token would correspond to an increase in the + model's confidence in the prediction of this class."%} + +{% include partials/inset-image image: '/assets/images/text-salience-image-2.png', + caption: 'TODO'%} + +#### Token-Based Methods + +[Gradient saturation](https://towardsdatascience.com/the-vanishing-gradient-problem-69bf08b15484) +is a potential problem for all of the Gradient based methods, such as [Grad L2 Norm](https://aclanthology.org/P18-1032/) +and [Grad · Input](https://arxiv.org/abs/1412.6815), that we need to look out +for. Essentially if the model learning saturates for a particular token, then +its gradient goes to zero and appears to have zero salience. At the same time, +some tokens actually have a zero salience score, because they do not affect the +predictions. And there is no simple way to tell if a token that we are +interested in is legitimately irrelevant or if we are just observing the effects +of gradient saturation. + +The [integrated gradients](https://arxiv.org/pdf/1703.01365.pdf) method +addresses the gradient saturation problem by enriching gradients with +embeddings. +[Tokens](https://jalammar.github.io/a-visual-guide-to-using-bert-for-the-first-time/) +are the discrete building blocks of text sequences, but they can also be +represented as vectors in a [continuous embedding space](https://colah.github.io/posts/2014-07-NLP-RNNs-Representations/). +IG computes per-token salience as the average salience over a set of local +gradients computed by interpolating between the token’s embedding vectors and a +baseline (typically the zero vector). The tradeoff is that IG requires more +effort to identify the right number of interpolation steps to be +effective (configurable in LIT’s interface), with the number of steps +correlating directly with runtime. It also requires more information, +which the model may or may not be able to provide. + +{% include partials/inset-image image: '/assets/images/text-salience-image-3.png', + caption: 'TODO'%} + +#### Blackbox Methods + +Some models do not provide tokens or token-level gradients, effectively making +them blackboxes. [LIME](https://arxiv.org/pdf/1602.04938v3.pdf) can be used with +these models. LIME works by generating a set of perturbed inputs, generally, by +dropping out or masking tokens, and training a local linear model to reconstruct +the original model's predictions. The weights of this linear model are treated +as the salience values. + +LIME has two limitations, compared to gradient-based methods: + +1. it can be slow as it requires many evaluations of the model, and +2. it can be noisy on longer inputs where there are more tokens to ablate. + +We can increase **_the number of samples to be used for LIME_** +within LIT to counter the potential noisiness, however +this is at the cost of computation time. + +In the example below, we go from the default number of samples (256) up to 4096, +increasing in powers of 2 (i.e., 256, 512, 1024, 2048, 4096). Note through these +transitions, how the model becomes more sure of the salience scores it outputs +(indicated by stronger colors for the key tokens). A particularly interesting +observation for this example is how the model isn’t sure about the word +“although” in this sentence, but later on learns that it has a negative +connotation and assigns a non-negligible negative salience score to it. + +{% include partials/inset-image image: '/assets/images/text-salience-image-4.png', + caption: 'TODO'%} + +{% include partials/inset-image image: '/assets/images/text-salience-image-5.png', + caption: 'TODO'%} + +{% include partials/inset-image image: '/assets/images/text-salience-image-6.png', + caption: 'TODO'%} + +{% include partials/inset-image image: '/assets/images/text-salience-image-7.png', + caption: 'TODO'%} + +{% include partials/inset-image image: '/assets/images/text-salience-image-8.png', + caption: 'TODO'%} + +Another interesting difference between the gradient based methods and LIME lies +in how they analyze the input. The gradient based methods use the model’s +tokenizer, which splits up words into smaller constituents, whereas LIME +splits the text into words at whitespaces. +Thus, LIME’s word-level results are often incomparable with the token-level +results from other methods, as you can see in the salience maps below. + +{% include partials/inset-image image: '/assets/images/text-salience-image-9.png', + caption: 'TODO'%} + +### Single example use-case : What do we do with the output of the salience maps module + +Let’s take a concrete example and walkthrough how we might use the salience maps +module and counterfactual generators to analyze the behavior of the `sst2-tiny` +model on the classification task. + +First, let’s refer back to our heuristic for choosing appropriate methods. +Because `sst2-tiny` does not have a LSTM architecture, we shouldn’t rely too much +on Grad · Input. So, we are left with Grad L2 Norm, Integrated Gradients and +LIME to base our decisions on. + +To gain some confidence in our heuristic, we look for examples where Grad ˙ +Input performs poorly compared to the other methods. There are quite a few in +the dataset, for example the sentence below where Grad · Input predicts +completely opposite salience scores to its counterparts. + +{% include partials/inset-image image: '/assets/images/text-salience-image-10.png', + caption: 'TODO'%} + +#### Use Case 1: Sexism Analysis with Counterfactuals + +Coming back to our use-case, we want to investigate if the model displays sexist +behavior for a particular input sentence. We take a datapoint with a negative +sentiment label, which talks about the performance of an actress in the movie. + +The key words/tokens (based on salience scores across the three chosen methods) +in this sentence are “hampered”, “lifetime-channel”, “lead”, “actress”, “her” +and “depth”. The only words out of this which are related to gender are +“actress” and “her”. The words “actress” and “her” get a significant weight +for both Grad L2 Norm and IG, and is assigned a positive score (IG scores are +slightly stronger than Grad L2 Norm scores), indicating that the gender of the +person is helping the model be sure of its predictions of this sentence being a +negative review sentiment. However for LIME, the salience scores for these two +words is a small negative number, indicating that the gender of the model is +actually causing a small decrease in model confidence for the prediction of this +being a negative review. Even with this small disparity between the token-based +and blackbox methods in the gender related words in the sentence, it turns out +that these are not the most important words. “Hampered”, “lifetime-channel” and +“plot” are the dominating words/tokens for this particular example in helping +the model make its decision. We still want to explore if reversing the gender +might change this. Would it make the model give more or less importance to other +tokens or the tokens we replaced? Would it change the model prediction +confidence scores? + +To do this, we generate a counterfactual example using the Datapoint +Editor which is located right beside the Data Table in the UI, +changing "actress" with "actor" and "her" with "his" after selecting our +datapoint of interest. An alternative to this approach is to use the Word +replacer under the Counterfactuals tab in the bottom half of the LIT app to +achieve the same task. If our model is predicting a negative sentiment +due to sexist influences towards +“actress” or “her”, then the hypothesis is that it should show opposite +sentiments if we flip those key tokens. + +{% include partials/inset-image image: '/assets/images/text-salience-image-11.png', + caption: 'TODO'%} + +However, it turns out that there is very minimal (and hence negligible) change +in the salience score values of any of the tokens. The model doesn’t change its +prediction either. It still predicts this to be a negative review sentiment with +approximately the same prediction confidence. This indicates that at least for +this particular example, our model isn’t displaying sexist behavior and is +actually making its prediction based on key tokens in the sentence which are not +related to the gender of the actress/actor. + +#### Use Case 2: Pairwise Comparisons + +Let’s take another example. This time we consider the sentence “a sometimes +tedious film” and generate three counterfactuals, first by replacing the two +words “sometimes” and “tedious” with their respective antonyms one-by-one and +then together to observe the changes in predictions and salience. + +To create the counterfactuals, we can simply use the Datapoint Editor which is +located right beside the Data Table in the UI. We can just select our data point +of interest (data point 6), and then replace the words we are interested in with +the respective substitutes. Then we assign a `label` to the newly created sentence +and add it to our data. For this particular example, we are assigning 0 when +"tedious" appears and 1 when "exciting" appears in the sentence. +An alternative to this approach is to use the Word +replacer under the Counterfactuals tab in the bottom half of the LIT app to +achieve the same task. + +{% include partials/inset-image image: '/assets/images/text-salience-image-12.png', + caption: 'TODO'%} + +We can pin the original sentence in the data table and then cycle through the +three available pairs by selecting each of the new sentences as our primary +selection. This will give us a comparison-type output in the Salience Maps +module between the pinned and the selected examples. + +{% include partials/inset-image image: '/assets/images/text-salience-image-13.png', + caption: 'TODO'%} + +When we replace “sometimes” with “often”, it gets a negative score of almost +equal magnitude (reversing polarity) from LIME which makes sense, because +“often” makes the next word in the sentence more impactful, linguistically. The model +prediction doesn’t change either, and this new review is still classified as +having a negative sentiment. + +{% include partials/inset-image image: '/assets/images/text-salience-image-14.png', + caption: 'TODO'%} + +On replacing “tedious” with “exciting”, the salience for “sometimes” changes +from positive score to negative in the LIME output. In the IG output, +“sometimes” changes from a strong positive score to a weak positive score. These +changes are also justified because in this new sentence “sometimes” counters the +positive effect of the word “exciting”. The main negative word in our original +datapoint was “tedious” and by replacing this with a positive word “exciting”, +the model’s classification of this new sentence also changes and the new +sentence is classified as positive with a very high confidence score. + +{% include partials/inset-image image: '/assets/images/text-salience-image-15.png', + caption: 'TODO'%} + +And finally, when we replace both “sometimes tedious” with “often exciting”, we +get strong positive scores from both LIME and IG, which is in line with the +overall strong positive sentiment of the sentence. The model predicts this new +sentence as positive sentiment, and the confidence score for this prediction is +slightly higher than the previous sentence where instead of “often” we had used +“sometimes”. This makes sense as well because “often” enhances the positive +sentiment slightly more than using “sometimes” in a positive review. + +{% include partials/inset-image image: '/assets/images/text-salience-image-16.png', + caption: 'TODO'%} + +In this second example, we mostly based our observation on LIME and IG, because +we could observe visual changes directly from the outputs of these methods. Grad +L2 Norm outputs were comparatively inconclusive, highlighting the need to +select appropriate methods and compare results between them. The model +predictions were in +line with our expected class labels and the confidence scores for predictions on +the counterfactuals could be justified using salience scores assigned to the new +tokens. + +#### Use Case 3: Quality Assurance + +A real life use case for the salience maps module can be in Quality Assurance. +For example, if there is a failure in production (e.g., wrong results for a search +query), we know the text input and the label the model predicted. We can use LIT +Salience Maps to debug this failure and figure out which tokens were most +influential in the prediction of the wrong label, and which alternative labels +could have been predicted (i.e., is there one clear winner, or are there a few +that are roughly the same?). Once we are done with debugging using LIT, we can +make the necessary changes to the model or training data (eg. adding fail-safes +or checks) to solve the production failure. + +### Conclusion + +Three gradient-based salience methods and one blackbox method are provided out +of the box to LIT users who need to use these post-hoc interpretations to make +sense of their language model’s predictions. This diverse array of built-in +techniques can be used in combination with other LIT modules like +counterfactuals to support robust exploration of a model's behavior, as +illustrated in this tutorial. And as always, LIT strives to +enable users to +[add their own salience interpreters](https://github.com/PAIR-code/lit/wiki/api.md#interpretation-components) +to allow for a wider variety +of use cases beyond these default capabilities!