Difference between revisions of "Free Software Directory:Participate/Script aid"
(Fix wrong minus signal placement and remove extra parenthesis.) |
(Comment each part of the expressions, rework introduction, describe dependencies) |
||
Line 3: | Line 3: | ||
This script aims to help contributors on finding things that might be needed to consider during evaluation of the project/software. | This script aims to help contributors on finding things that might be needed to consider during evaluation of the project/software. | ||
− | + | Besides searching for common licensing terms, it lists all the MIME/media types of files in the current directory, so that one can find cases of files without complete and corresponding source and also find JavaScript files to look or insert GNU LibreJS syntax. | |
− | + | To tackle cases of files which are partly JavaScript, it also scans for words related to JS and event handlers. | |
− | The output is in CSV format | + | As an extra precaution, an extra pass for other possible problematic words is made. |
+ | |||
+ | There is also a field for notes which is always left empty for the evaluator to do the appropriate observations or even insert marks of continuation for future resume of the review. | ||
+ | |||
+ | The output is in CSV format, making it suitable for parsing by other software, such as LibreOffice Calc, GNU R and even GNU Awk. For the last case, before doing so, set <code>FPAT</code> to <code>([^,]*)|(\"[^\"]+\")</code> and <code>RS</code> to <code>\r\n</code>, This variable setting was based on [https://www.gnu.org/software/gawk/manual/html_node/Splitting-By-Content.html the related section in the GNU Awk User's Guide] and on Awk's Texinfo/Info page. | ||
+ | |||
+ | In all cases, when importing, make sure that the selected language is English, so that "TRUE" and "FALSE" can be translated as correct boolean representation in your language of choice. Also, be aware of false-positives. | ||
You're welcome to contribute to this script and add your name and contact information to the copyright notice of the script. | You're welcome to contribute to this script and add your name and contact information to the copyright notice of the script. | ||
Line 24: | Line 30: | ||
Now leave the script to do its work and wait for the sound clue to continue working on the evaluation. | Now leave the script to do its work and wait for the sound clue to continue working on the evaluation. | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
== Script == | == Script == | ||
<pre style="white-space: pre-wrap"> | <pre style="white-space: pre-wrap"> | ||
− | #!/bin/ | + | #!/bin/bash |
# FSD Script Aid.sh: Helps user evaluate entries for the Free Software Directory. | # FSD Script Aid.sh: Helps user evaluate entries for the Free Software Directory. | ||
# Copyright (C) 2016, 2018, 2020, 2021 Adonay "adfeno" Felipe Nogueira <https://libreplanet.org/wiki/User:Adfeno> <adfeno@hyperbola.info> | # Copyright (C) 2016, 2018, 2020, 2021 Adonay "adfeno" Felipe Nogueira <https://libreplanet.org/wiki/User:Adfeno> <adfeno@hyperbola.info> | ||
− | # This program is free software: you can redistribute it and/or modify | + | # This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. |
− | + | ||
− | + | # This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. | |
− | |||
− | # | + | # You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | printf '" | + | # For more information on the purpose and caveats of this script, see <https://directory.fsf.org/wiki/Free_Software_Directory:Participate/Script_aid>. |
+ | |||
+ | |||
+ | # # Dependencies | ||
+ | |||
+ | |||
+ | # * GNU bash; | ||
+ | # * any implementation of the following POSIX utilities: | ||
+ | # * file, whose -i option prints MIME type and character set; | ||
+ | # * find; | ||
+ | # * grep; | ||
+ | # * printf; | ||
+ | # * sed; | ||
+ | # * tr. | ||
+ | |||
+ | |||
+ | # # Extended regular expressions (ERE) to match possible licensing issues | ||
+ | |||
+ | |||
+ | # Copyright symbol | ||
+ | licensing_ere="©" | ||
+ | |||
+ | # Use GNU bash's += assignment to append to an existing variable. | ||
+ | # Agreement | ||
+ | licensing_ere+="|agreement|a(co|cue)rdo" | ||
+ | # Allowed | ||
+ | licensing_ere+="|allowed|permitid[ao]" | ||
+ | # As is | ||
+ | licensing_ere+="|as[[:space:]-]*is[^[:alnum:]]" | ||
+ | # Condition | ||
+ | licensing_ere+="|condi(tions?|ci(ón|ones)|ç(ão|ões))" | ||
+ | # Copyright, copyleft, copyfarleft, copyfair, copymi | ||
+ | licensing_ere+="|copy(right|(far)?left|fai?r|m[ei])" | ||
+ | # EULA, exclusive | ||
+ | licensing_ere+="|eula|exclusiv[aeo]" | ||
+ | # Forbid | ||
+ | licensing_ere+="|forbid(s|den)?|pro(hibited|h?ibid[ao])" | ||
+ | # License abbreviations | ||
+ | licensing_ere+="|[al]?gpl|fdl" | ||
+ | # Law | ||
+ | licensing_ere+="|l(aw|e[iy])" | ||
+ | # Liable | ||
+ | licensing_ere+="|liab(le|ilit(y|ies))" | ||
+ | # Responsible | ||
+ | licensing_ere+="|respons(ib(le|ilit(y|ies))|ab(le|ilidad(es?)?)|áve(l|is))" | ||
+ | # License | ||
+ | licensing_ere+="|licen([cs]e|(ç|ci)a)" | ||
+ | # Notice | ||
+ | licensing_ere+="|not(ice|[ií]cia|ifica(tion|ção|ción))" | ||
+ | # Patent, right | ||
+ | licensing_ere+="|patente?|right|d(erech|ireit)o|droit" | ||
+ | # Terms | ||
+ | licensing_ere+="|t(erms?|érminos?|ermos?)" | ||
+ | # Trade | ||
+ | licensing_ere+="|trade[[:space:]]+(mark|secret)" | ||
+ | # Transfer, warrant | ||
+ | licensing_ere+="|transfer|gu?arant|warrant" | ||
+ | |||
+ | |||
+ | # # Extended regular expression (ERE) to match JavaScript issues | ||
+ | |||
+ | |||
+ | # Start of boundary | ||
+ | javascript_ere="(^|[^[:alnum:]]+)" | ||
+ | javascript_ere+="(" | ||
+ | |||
+ | # Script tag or addEventListener() function | ||
+ | javascript_ere+="script|addeventlistener" | ||
+ | |||
+ | # Start of after, before, on variations | ||
+ | javascript_ere+="|(after|before|on)" | ||
+ | javascript_ere+="(" | ||
+ | |||
+ | # Abort, autocomplete, blur, cancel, canplay | ||
+ | javascript_ere+="abort|autocomplete(error)?|blur|cancel|canplay(through)?" | ||
+ | # ( Cue, duration, hash, language, rate, readystate, volume ) … change | ||
+ | javascript_ere+="|(cue|duration|hash|language|rate|readystate|volume)?change" | ||
+ | # Click, close, contextmenu | ||
+ | javascript_ere+="|(db)?click|close|contextmenu" | ||
+ | # Drag and drop | ||
+ | javascript_ere+="|drag(end|enter|exit|leave|over|start)|drop" | ||
+ | # Emptied, ended, error, focus, input, invalid | ||
+ | javascript_ere+="|emptied|ended|error|focus|input|invalid" | ||
+ | # Key presses | ||
+ | javascript_ere+="|key(down|press|up)" | ||
+ | # Load | ||
+ | javascript_ere+="|(un)?load(ed(meta)?data)?" | ||
+ | # Start, message | ||
+ | javascript_ere+="|start|message" | ||
+ | # Mouse | ||
+ | javascript_ere+="|mouse(down|enter|leave|move|out|over|up|wheel)" | ||
+ | # Connectivity and page | ||
+ | javascript_ere+="|(off|on)line|page(hide|show)" | ||
+ | # Play, popstate, print, progress | ||
+ | javascript_ere+="|pause|play(ing)?|popstate|print|progress" | ||
+ | # Reset, resize, scroll, seek | ||
+ | javascript_ere+="|reset|resize|scroll|seek(ed|ing)" | ||
+ | # Select, show, sort, stalled, storage, submit | ||
+ | javascript_ere+="|select|show|sort|stalled|storage|submit" | ||
+ | # Suspend, timeupdate, toggle, waiting | ||
+ | javascript_ere+="|suspend|timeupdate|toggle|waiting" | ||
+ | |||
+ | # End of after, before, on variations | ||
+ | javascript_ere+=")" | ||
+ | |||
+ | # End of boundary | ||
+ | javascript_ere+=")" | ||
+ | javascript_ere+="([^[:alnum:]]+|$)" | ||
+ | |||
+ | |||
+ | # # Extended regular expression (ERE) to match possible other issues | ||
+ | |||
+ | |||
+ | # Google | ||
+ | other_ere+="gcm|google([^[:alnum:]]*cloud[^[:alnum:]]*messaging)?|youtube|yt" | ||
+ | # Microsoft, Facebook, WhatsApp, Telegram | ||
+ | other_ere+="|microsoft|facebook|whats[^[:alnum:]]*app|telegram" | ||
+ | # CDNs, Amazon, CloudFlare | ||
+ | other_ere+="|amazon|aws|cloud[^[:alnum:]]*flare|cdn" | ||
+ | # Apple, Uber, AirBnB | ||
+ | other_ere+="|apple|uber|air[^[:alnum:]]*bnb" | ||
+ | # System distributions | ||
+ | other_ere+="|android|lineage|cyanogen" | ||
+ | # DRM, Chrome and derivated | ||
+ | other_ere+="|drm|chrom(e|ium)|electron" | ||
+ | |||
+ | |||
+ | separate_mime_from_charset() { | ||
+ | # * Arguments | ||
+ | # * $1: file path to inspect MIME type and character set. | ||
+ | # * Standard output | ||
+ | # * Comma-separated pair consisting of MIME type and character set. | ||
+ | |||
+ | # Make use of -i to get MIME type and character set. | ||
+ | # sed does the split and tr deletes line feeds/new lines. | ||
+ | file -hi "$1" \ | ||
+ | | sed \ | ||
+ | '{ | ||
+ | s/^\.\//"/g | ||
+ | s/: /",/g | ||
+ | s/; charset=/,/g | ||
+ | }' \ | ||
+ | | tr -d "\n" | ||
+ | } | ||
+ | |||
+ | was_ere_found() { | ||
+ | # * Arguments | ||
+ | # * $1: extended regular expression to be searched; | ||
+ | # * $2: file path to look for given extended regular expression. | ||
+ | # * Standard output | ||
+ | # * If a matche was found | ||
+ | # * String "TRUE" | ||
+ | # * Else | ||
+ | # * String "FALSE" | ||
+ | |||
+ | grep -Eiq "$1" "$2" | ||
+ | [ $? -eq 0 ] && printf "TRUE" || printf "FALSE" | ||
+ | } | ||
+ | |||
+ | # Use GNU bash's -f option to export functions for use with find. | ||
+ | export -f separate_mime_from_charset | ||
+ | export -f was_ere_found | ||
+ | |||
+ | printf '"Path","MIME","Charset","Licensing","JavaScript","Other","Notes"\r\n' | ||
find "." \ | find "." \ | ||
\( \ | \( \ | ||
-type d \ | -type d \ | ||
− | \( -name '.cvs' - | + | \( -name '.cvs' -o -name '.hg' -o -name '.git' -o -name '.svn' \) \ |
− | \) -prune - | + | \) -prune -o \ |
! -type d \ | ! -type d \ | ||
− | -exec | + | -exec bash -c \ |
− | + | 'separate_mime_from_charset "{}"' \; \ | |
− | + | -exec printf ',' \; \ | |
− | + | -exec bash -c \ | |
− | + | 'was_ere_found "'"$licensing_ere"'" "{}"' \; \ | |
− | + | -exec printf ',' \; \ | |
− | -printf ',' \ | + | -exec bash -c \ |
− | -exec | + | 'was_ere_found "'"$javascript_ere"'" "{}"' \; \ |
− | ' | + | -exec printf ',' \; \ |
− | + | -exec bash -c \ | |
− | + | 'was_ere_found "'"$other_ere"'" "{}"' \; \ | |
− | + | -exec printf ',' \; \ | |
− | + | -exec printf '\r\n' \; | |
− | |||
− | |||
− | |||
− | |||
− | -printf ',' \ | ||
− | -exec | ||
− | ' | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | -printf ',' \ | ||
− | -exec | ||
− | ' | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | -printf ',' \ | ||
− | -printf '\r\n' | ||
</pre> | </pre> |
Revision as of 16:50, 14 February 2021
Purpose
This script aims to help contributors on finding things that might be needed to consider during evaluation of the project/software.
Besides searching for common licensing terms, it lists all the MIME/media types of files in the current directory, so that one can find cases of files without complete and corresponding source and also find JavaScript files to look or insert GNU LibreJS syntax.
To tackle cases of files which are partly JavaScript, it also scans for words related to JS and event handlers.
As an extra precaution, an extra pass for other possible problematic words is made.
There is also a field for notes which is always left empty for the evaluator to do the appropriate observations or even insert marks of continuation for future resume of the review.
The output is in CSV format, making it suitable for parsing by other software, such as LibreOffice Calc, GNU R and even GNU Awk. For the last case, before doing so, set FPAT
to ([^,]*)|(\"[^\"]+\")
and RS
to \r\n
, This variable setting was based on the related section in the GNU Awk User's Guide and on Awk's Texinfo/Info page.
In all cases, when importing, make sure that the selected language is English, so that "TRUE" and "FALSE" can be translated as correct boolean representation in your language of choice. Also, be aware of false-positives.
You're welcome to contribute to this script and add your name and contact information to the copyright notice of the script.
Usage
Best if you take the complete corresponding source of the project being evaluated (e.g.: when using git clone
, you can accomplish this using the --recursive
option.
git clone --recursive [Some git repository.] cd [Directory created by git] [Script aid.] > [Desired text file to store output.]; printf '\a'
printf '\a'
can be replaced by a command to play an audio file of your choice.
Now leave the script to do its work and wait for the sound clue to continue working on the evaluation.
Script
#!/bin/bash # FSD Script Aid.sh: Helps user evaluate entries for the Free Software Directory. # Copyright (C) 2016, 2018, 2020, 2021 Adonay "adfeno" Felipe Nogueira <https://libreplanet.org/wiki/User:Adfeno> <adfeno@hyperbola.info> # This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. # For more information on the purpose and caveats of this script, see <https://directory.fsf.org/wiki/Free_Software_Directory:Participate/Script_aid>. # # Dependencies # * GNU bash; # * any implementation of the following POSIX utilities: # * file, whose -i option prints MIME type and character set; # * find; # * grep; # * printf; # * sed; # * tr. # # Extended regular expressions (ERE) to match possible licensing issues # Copyright symbol licensing_ere="©" # Use GNU bash's += assignment to append to an existing variable. # Agreement licensing_ere+="|agreement|a(co|cue)rdo" # Allowed licensing_ere+="|allowed|permitid[ao]" # As is licensing_ere+="|as[[:space:]-]*is[^[:alnum:]]" # Condition licensing_ere+="|condi(tions?|ci(ón|ones)|ç(ão|ões))" # Copyright, copyleft, copyfarleft, copyfair, copymi licensing_ere+="|copy(right|(far)?left|fai?r|m[ei])" # EULA, exclusive licensing_ere+="|eula|exclusiv[aeo]" # Forbid licensing_ere+="|forbid(s|den)?|pro(hibited|h?ibid[ao])" # License abbreviations licensing_ere+="|[al]?gpl|fdl" # Law licensing_ere+="|l(aw|e[iy])" # Liable licensing_ere+="|liab(le|ilit(y|ies))" # Responsible licensing_ere+="|respons(ib(le|ilit(y|ies))|ab(le|ilidad(es?)?)|áve(l|is))" # License licensing_ere+="|licen([cs]e|(ç|ci)a)" # Notice licensing_ere+="|not(ice|[ií]cia|ifica(tion|ção|ción))" # Patent, right licensing_ere+="|patente?|right|d(erech|ireit)o|droit" # Terms licensing_ere+="|t(erms?|érminos?|ermos?)" # Trade licensing_ere+="|trade[[:space:]]+(mark|secret)" # Transfer, warrant licensing_ere+="|transfer|gu?arant|warrant" # # Extended regular expression (ERE) to match JavaScript issues # Start of boundary javascript_ere="(^|[^[:alnum:]]+)" javascript_ere+="(" # Script tag or addEventListener() function javascript_ere+="script|addeventlistener" # Start of after, before, on variations javascript_ere+="|(after|before|on)" javascript_ere+="(" # Abort, autocomplete, blur, cancel, canplay javascript_ere+="abort|autocomplete(error)?|blur|cancel|canplay(through)?" # ( Cue, duration, hash, language, rate, readystate, volume ) … change javascript_ere+="|(cue|duration|hash|language|rate|readystate|volume)?change" # Click, close, contextmenu javascript_ere+="|(db)?click|close|contextmenu" # Drag and drop javascript_ere+="|drag(end|enter|exit|leave|over|start)|drop" # Emptied, ended, error, focus, input, invalid javascript_ere+="|emptied|ended|error|focus|input|invalid" # Key presses javascript_ere+="|key(down|press|up)" # Load javascript_ere+="|(un)?load(ed(meta)?data)?" # Start, message javascript_ere+="|start|message" # Mouse javascript_ere+="|mouse(down|enter|leave|move|out|over|up|wheel)" # Connectivity and page javascript_ere+="|(off|on)line|page(hide|show)" # Play, popstate, print, progress javascript_ere+="|pause|play(ing)?|popstate|print|progress" # Reset, resize, scroll, seek javascript_ere+="|reset|resize|scroll|seek(ed|ing)" # Select, show, sort, stalled, storage, submit javascript_ere+="|select|show|sort|stalled|storage|submit" # Suspend, timeupdate, toggle, waiting javascript_ere+="|suspend|timeupdate|toggle|waiting" # End of after, before, on variations javascript_ere+=")" # End of boundary javascript_ere+=")" javascript_ere+="([^[:alnum:]]+|$)" # # Extended regular expression (ERE) to match possible other issues # Google other_ere+="gcm|google([^[:alnum:]]*cloud[^[:alnum:]]*messaging)?|youtube|yt" # Microsoft, Facebook, WhatsApp, Telegram other_ere+="|microsoft|facebook|whats[^[:alnum:]]*app|telegram" # CDNs, Amazon, CloudFlare other_ere+="|amazon|aws|cloud[^[:alnum:]]*flare|cdn" # Apple, Uber, AirBnB other_ere+="|apple|uber|air[^[:alnum:]]*bnb" # System distributions other_ere+="|android|lineage|cyanogen" # DRM, Chrome and derivated other_ere+="|drm|chrom(e|ium)|electron" separate_mime_from_charset() { # * Arguments # * $1: file path to inspect MIME type and character set. # * Standard output # * Comma-separated pair consisting of MIME type and character set. # Make use of -i to get MIME type and character set. # sed does the split and tr deletes line feeds/new lines. file -hi "$1" \ | sed \ '{ s/^\.\//"/g s/: /",/g s/; charset=/,/g }' \ | tr -d "\n" } was_ere_found() { # * Arguments # * $1: extended regular expression to be searched; # * $2: file path to look for given extended regular expression. # * Standard output # * If a matche was found # * String "TRUE" # * Else # * String "FALSE" grep -Eiq "$1" "$2" [ $? -eq 0 ] && printf "TRUE" || printf "FALSE" } # Use GNU bash's -f option to export functions for use with find. export -f separate_mime_from_charset export -f was_ere_found printf '"Path","MIME","Charset","Licensing","JavaScript","Other","Notes"\r\n' find "." \ \( \ -type d \ \( -name '.cvs' -o -name '.hg' -o -name '.git' -o -name '.svn' \) \ \) -prune -o \ ! -type d \ -exec bash -c \ 'separate_mime_from_charset "{}"' \; \ -exec printf ',' \; \ -exec bash -c \ 'was_ere_found "'"$licensing_ere"'" "{}"' \; \ -exec printf ',' \; \ -exec bash -c \ 'was_ere_found "'"$javascript_ere"'" "{}"' \; \ -exec printf ',' \; \ -exec bash -c \ 'was_ere_found "'"$other_ere"'" "{}"' \; \ -exec printf ',' \; \ -exec printf '\r\n' \;
Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is included in the page “GNU Free Documentation License”.
The copyright and license notices on this page only apply to the text on this page. Any software or copyright-licenses or other similar notices described in this text has its own copyright notice and license, which can usually be found in the distribution or license text itself.