1 ! To remove the DOS carriage returns from Windows machines:
2 ! perl -pi -e 's/\r\n|\n|\r/\n/g' file-to-convert # Convert to UNIX
3 ! This program is not required for that.
4 ! For completeness, from stackoverflow:
5 ! perl -pi -e 's/\r\n|\n|\r/\r\n/g' file-to-convert # Convert to DOS
7 ! The purpose of this program is to scan through all of the
8 ! lines of a Fortran file to detect if there are any character
9 ! codes (excluding the ubiquitously used tab character) outside
10 ! the range of 32 to 127 (the standard printable ASCII character
13 ! To easily see the offending characters:
14 ! vim -c "e ++enc=latin1" file
16 ! The "Fortran 2003 Handbook" (Adams et al. 2009), in section 3.1.1,
17 ! lists the standard Fortran character set, which is consistent
18 ! with the restriction of the ASCII character codes from 32
19 ! through 127 (inclusive).
21 ! So far during source code testing, all of the special characters
22 ! outside of the ASCII printable character range have been used
23 ! within comments. The WRF model strips comments before passing
24 ! the code through to the compiler. These characters can
25 ! occasionally get introduced with physics routines when variable
26 ! names or units use special characters (superscripts, squared
27 ! terms, hats, etc), or when short- or long-dashes are used in
28 ! citing references. Again, these sources are typical of
29 ! information that could be provided in a comment without
30 ! impacting the code to be built.
32 ! How to build the finder program:
33 ! gfortran -ffree-form non_ascii_finder.F
36 ! ./a.out -v|-V file.F
38 ! Typically, the program is run twice.
40 ! 1) The program is run the first time with the verbose flag
41 ! (mandatory) set to "-v". Output only occurs when the input file
42 ! contains at least one offending character. In this case, the
43 ! output is the file name containing the offending character.
45 ! 2) Once the list of problematic files is assembled, the program
46 ! is re-run with verbose flag set to "-V". For each file processed,
47 ! the program outputs the line of source code that contains the
48 ! offending character(s), and also output the line number (to help
49 ! the user find the string).
51 ! Here is an example of the two step usage before all of the offending
52 ! characters were removed. From the top WRF directory:
54 ! <type on the command line>
55 ! find . -name \*.F -exec tools/a.out -v {} \;
58 ! ./chem/module_cam_mam_newnuc.F
59 ! ./chem/module_gocart_dmsemis.F
60 ! ./chem/module_gocart_seasalt.F
61 ! ./chem/module_mozcart_wetscav.F
62 ! ./chem/module_sea_salt_emis.F
63 ! ./dyn_em/module_sfs_driver.F
64 ! ./dyn_em/module_sfs_nba.F
65 ! ./frame/module_cpl.F
66 ! ./hydro/Routing/module_gw_gw2d.F
67 ! ./phys/module_bl_mfshconvpbl.F
68 ! ./phys/module_gocart_seasalt.F
69 ! ./phys/module_ltng_cpmpr92z.F
70 ! ./phys/module_ltng_crmpr92.F
71 ! ./phys/module_ltng_iccg.F
72 ! ./phys/module_mp_nssl_2mom.F
73 ! ./phys/module_mp_wdm6.F
74 ! ./phys/module_sf_bem.F
75 ! ./phys/module_sf_bep.F
76 ! ./phys/module_sf_bep_bem.F
77 ! ./tools/non_ascii_finder.F
78 ! ./var/convertor/wave2grid_kma/pvchkdv.F
80 ! The manufactured list of files (shown above) can be processed
81 ! individually, now with the "-V" flag:
83 ! <type on the command line>
84 ! tools/a.out -V ./tools/non_ascii_finder.F
87 ! ./non_ascii_finder.F
88 ! Found something on line # 25
89 ! ! --> this line has a problem with the superscript numeral 2: [W/m^2]
90 ! Character # 69 is a ?, which is character code 194
91 ! ./non_ascii_finder.F
92 ! Found something on line # 25
93 ! ! --> this line has a problem with the superscript numeral 2: [W/m^2]
94 ! Character # 70 is a ?, which is character code 178
95 ! Troubles, with 2 lines.
96 ! File uses character codes outside the standard ASCII range of 32 to 127
98 ! As a test, running the executable on this file will locate that
99 ! --> this line has a problem with the superscript numeral 2: [W/m²]
101 PROGRAM non_ascii_finder
105 INTEGER , PARAMETER :: MAX_LENGTH = 256
106 INTEGER , PARAMETER :: FIRST_VALID = 32
107 INTEGER , PARAMETER :: LAST_VALID = 127
108 INTEGER , PARAMETER :: TAB = 9
109 INTEGER , PARAMETER :: input_unit = 10
111 ! Information from a single line of the input file.
113 CHARACTER (len=MAX_LENGTH) :: input_string
114 CHARACTER (LEN=1 ) :: test_variable
116 ! The name of the input file (the file that will be
119 CHARACTER (LEN=MAX_LENGTH) :: filename
120 INTEGER :: filename_length
124 CHARACTER (LEN=MAX_LENGTH) :: verbose_arg
126 INTEGER :: verbose ! from input -v 0=typical for "find" output, just a filename when there are troubles
127 ! from input -V 1=typical for single file searching, specifically: what lines need fixing
128 ! from input -VV 2=typical for debugging this program
132 INTEGER :: line_count, problem_line_count
134 ! Get the command line info.
136 num_args = COMMAND_ARGUMENT_COUNT()
138 ! Do we have enough arguments, we want two.
140 IF ( num_args .NE. 2 ) THEN
142 PRINT *,'./a.out <mandatory verbose level> <mandatory filename>'
143 PRINT *,'where <verbose level> is either -v or -V'
144 PRINT *,' -v: outputs the filename only iff offending characters were found'
145 PRINT *,' -V: outputs the line(s) containing the offending characters'
146 PRINT *,'where <filename> is a Fortran source file'
148 PRINT *,'For more information, please read the comments at the top of '
149 PRINT *,"this program's source code: tools/non_ascii_finder.F"
153 CALL GET_COMMAND_ARGUMENT ( NUMBER=1, VALUE=verbose_arg, LENGTH=arg_len, STATUS=status )
155 IF ( status .NE. 0 ) THEN
156 PRINT *,'The verbose switch is either "-v" or "-V"'
160 IF ( verbose_arg(1:3) .EQ. "-VV" ) THEN
162 ELSE IF ( verbose_arg(1:2) .EQ. "-v" ) THEN
164 ELSE IF ( verbose_arg(1:2) .EQ. "-V" ) THEN
167 PRINT *,'The verbose switch is either "-v" or "-V"'
168 PRINT *,'Entered: ',verbose_arg(1:arg_len)
172 CALL GET_COMMAND_ARGUMENT ( NUMBER=2, VALUE=filename, LENGTH=filename_length, STATUS=status )
174 IF ( status .EQ. 0 ) THEN
175 OPEN ( UNIT = input_unit , &
176 FILE = TRIM(filename) , &
177 ACCESS = 'SEQUENTIAL' , &
179 FORM = 'FORMATTED' , &
180 POSITION = 'ASIS' , &
185 IF ( status .NE. 0 ) THEN
186 PRINT *,'Hmmm, troubles trying to open ',TRIM(filename),' for READ.'
193 problem_line_count = 0
195 ! Loop over each line of the input file.
199 DO ind = 1 , MAX_LENGTH
200 input_string(ind:ind) = ' '
202 READ (input_unit,FMT='(A)',IOSTAT=status) input_string
204 IF ( status < 0 ) THEN
205 IF ( verbose .EQ. 2 ) THEN
206 PRINT *,TRIM(filename),', End of file after attempting to read line #',line_count
211 DO ind = 1 , MAX_LENGTH
212 IF ( ( ( ICHAR(input_string(ind:ind)) .LT. FIRST_VALID ) .OR. &
213 ( ICHAR(input_string(ind:ind)) .GT. LAST_VALID ) ) .AND. &
214 ( ICHAR(input_string(ind:ind)) .NE. TAB ) ) THEN
215 problem_line_count = problem_line_count + 1
216 IF ( verbose .EQ. 0 ) THEN
218 PRINT *,TRIM(filename)
219 PRINT *,'Found something on line #',line_count
220 PRINT *,TRIM(input_string)
221 PRINT *,'Character # ',ind,' is a ',input_string(ind:ind),', which is character code ',ICHAR(input_string(ind:ind))
226 line_count = line_count + 1
230 ! What actually happened for this file, let us find out.
232 IF ( problem_line_count .EQ. 0 ) THEN
233 IF ( verbose .EQ. 2 ) THEN
234 PRINT *,'OK, File uses only ASCII character codes from ',FIRST_VALID,' through',LAST_VALID
236 ELSE IF ( verbose .EQ. 1 ) THEN
237 PRINT *,'Troubles, with ',problem_line_count,' lines.'
238 PRINT *,'File uses character codes outside the standard ASCII range of ',FIRST_VALID,' to ',LAST_VALID
239 ELSE IF ( verbose .EQ. 0 ) THEN
240 PRINT *,TRIM(filename) ! , problem_line_count
243 END PROGRAM non_ascii_finder